[InstCombine][X86] Added extra tests introduced for D17490

llvm-svn: 266732
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2016-04-19 12:59:52 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2016-04-19 12:59:52 +0000
commit: 998cffa6b921a469e892269e0961a8eec69bf3fe (patch)
tree: 60a6eb74bdf7ee5b37378b05850ea302599c62a1
parent: 74b3bfdf71af1b6fb877bfc703244af093df4b3d (diff)
download: bcm5719-llvm-998cffa6b921a469e892269e0961a8eec69bf3fe.tar.gz
bcm5719-llvm-998cffa6b921a469e892269e0961a8eec69bf3fe.zip
4 files changed, 578 insertions, 0 deletions
diff --git a/llvm/test/Transforms/InstCombine/x86-sse.ll b/llvm/test/Transforms/InstCombine/x86-sse.ll
index 08509dbd717..80759ae6601 100644
--- a/llvm/test/Transforms/InstCombine/x86-sse.ll
+++ b/llvm/test/Transforms/InstCombine/x86-sse.ll
@@ -21,6 +21,25 @@ define float @test_rcp_ss_0(float %a) {
   ret float %6
 }
 
+define float @test_rcp_ss_1(float %a) {
+; CHECK-LABEL: @test_rcp_ss_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 1
+; CHECK-NEXT:    ret float [[TMP6]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 1
+  ret float %6
+}
+
 define float @test_sqrt_ss_0(float %a) {
 ; CHECK-LABEL: @test_sqrt_ss_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
@@ -40,6 +59,25 @@ define float @test_sqrt_ss_0(float %a) {
   ret float %6
 }
 
+define float @test_sqrt_ss_2(float %a) {
+; CHECK-LABEL: @test_sqrt_ss_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 2
+; CHECK-NEXT:    ret float [[TMP6]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 2
+  ret float %6
+}
+
 define float @test_rsqrt_ss_0(float %a) {
 ; CHECK-LABEL: @test_rsqrt_ss_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
@@ -59,6 +97,40 @@ define float @test_rsqrt_ss_0(float %a) {
   ret float %6
 }
 
+define float @test_rsqrt_ss_3(float %a) {
+; CHECK-LABEL: @test_rsqrt_ss_3(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 3
+; CHECK-NEXT:    ret float [[TMP6]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 3
+  ret float %6
+}
+
+define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_add_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> [[TMP3]])
+; CHECK-NEXT:    ret <4 x float> [[TMP4]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> %3)
+  ret <4 x float> %4
+}
+
 define float @test_add_ss_0(float %a, float %b) {
 ; CHECK-LABEL: @test_add_ss_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
@@ -86,6 +158,42 @@ define float @test_add_ss_0(float %a, float %b) {
   ret float %r
 }
 
+define float @test_add_ss_1(float %a, float %b) {
+; CHECK-LABEL: @test_add_ss_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> [[TMP4]], <4 x float> [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[TMP6]], i32 1
+; CHECK-NEXT:    ret float [[TMP7]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %5)
+  %7 = extractelement <4 x float> %6, i32 1
+  ret float %7
+}
+
+define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_sub_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> [[TMP3]])
+; CHECK-NEXT:    ret <4 x float> [[TMP4]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> %3)
+  ret <4 x float> %4
+}
+
 define float @test_sub_ss_0(float %a, float %b) {
 ; CHECK-LABEL: @test_sub_ss_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub float %a, %b
@@ -104,6 +212,37 @@ define float @test_sub_ss_0(float %a, float %b) {
   ret float %r
 }
 
+define float @test_sub_ss_2(float %a, float %b) {
+; CHECK-LABEL: @test_sub_ss_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> <float undef, float undef, float 2.000000e+00, float undef>, <4 x float> undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %5)
+  %7 = extractelement <4 x float> %6, i32 2
+  ret float %7
+}
+
+define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_mul_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> [[TMP3]])
+; CHECK-NEXT:    ret <4 x float> [[TMP4]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> %3)
+  ret <4 x float> %4
+}
+
 define float @test_mul_ss_0(float %a, float %b) {
 ; CHECK-LABEL: @test_mul_ss_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul float %a, %b
@@ -122,6 +261,37 @@ define float @test_mul_ss_0(float %a, float %b) {
   ret float %r
 }
 
+define float @test_mul_ss_3(float %a, float %b) {
+; CHECK-LABEL: @test_mul_ss_3(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> <float undef, float undef, float undef, float 3.000000e+00>, <4 x float> undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %5)
+  %7 = extractelement <4 x float> %6, i32 3
+  ret float %7
+}
+
+define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_div_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> [[TMP3]])
+; CHECK-NEXT:    ret <4 x float> [[TMP4]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> %3)
+  ret <4 x float> %4
+}
+
 define float @test_div_ss_0(float %a, float %b) {
 ; CHECK-LABEL: @test_div_ss_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
@@ -149,6 +319,42 @@ define float @test_div_ss_0(float %a, float %b) {
   ret float %r
 }
 
+define float @test_div_ss_1(float %a, float %b) {
+; CHECK-LABEL: @test_div_ss_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> [[TMP4]], <4 x float> [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[TMP6]], i32 1
+; CHECK-NEXT:    ret float [[TMP7]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %5)
+  %7 = extractelement <4 x float> %6, i32 1
+  ret float %7
+}
+
+define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_min_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> [[TMP3]])
+; CHECK-NEXT:    ret <4 x float> [[TMP4]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %3)
+  ret <4 x float> %4
+}
+
 define float @test_min_ss_0(float %a, float %b) {
 ; CHECK-LABEL: @test_min_ss_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
@@ -170,6 +376,37 @@ define float @test_min_ss_0(float %a, float %b) {
   ret float %10
 }
 
+define float @test_min_ss_2(float %a, float %b) {
+; CHECK-LABEL: @test_min_ss_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> <float undef, float undef, float 2.000000e+00, float undef>, <4 x float> undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %5)
+  %7 = extractelement <4 x float> %6, i32 2
+  ret float %7
+}
+
+define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_max_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> [[TMP3]])
+; CHECK-NEXT:    ret <4 x float> [[TMP4]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %3)
+  ret <4 x float> %4
+}
+
 define float @test_max_ss_0(float %a, float %b) {
 ; CHECK-LABEL: @test_max_ss_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
@@ -191,6 +428,37 @@ define float @test_max_ss_0(float %a, float %b) {
   ret float %10
 }
 
+define float @test_max_ss_3(float %a, float %b) {
+; CHECK-LABEL: @test_max_ss_3(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> <float undef, float undef, float undef, float 3.000000e+00>, <4 x float> undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %5)
+  %7 = extractelement <4 x float> %6, i32 3
+  ret float %7
+}
+
+define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_cmp_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> [[TMP3]], i8 0)
+; CHECK-NEXT:    ret <4 x float> [[TMP4]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %3, i8 0)
+  ret <4 x float> %4
+}
+
 define float @test_cmp_ss_0(float %a, float %b) {
 ; CHECK-LABEL: @test_cmp_ss_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
@@ -218,6 +486,27 @@ define float @test_cmp_ss_0(float %a, float %b) {
   ret float %r
 }
 
+define float @test_cmp_ss_1(float %a, float %b) {
+; CHECK-LABEL: @test_cmp_ss_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP4]], <4 x float> [[TMP5]], i8 0)
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[TMP6]], i32 1
+; CHECK-NEXT:    ret float [[TMP7]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %5, i8 0)
+  %7 = extractelement <4 x float> %6, i32 1
+  ret float %7
+}
+
 define i32 @test_comieq_ss_0(float %a, float %b) {
 ; CHECK-LABEL: @test_comieq_ss_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
diff --git a/llvm/test/Transforms/InstCombine/x86-sse2.ll b/llvm/test/Transforms/InstCombine/x86-sse2.ll
index 3a4b49c4a23..0e163b4ec75 100644
--- a/llvm/test/Transforms/InstCombine/x86-sse2.ll
+++ b/llvm/test/Transforms/InstCombine/x86-sse2.ll
@@ -17,6 +17,32 @@ define double @test_sqrt_sd_0(double %a) {
   ret double %4
 }
 
+define double @test_sqrt_sd_1(double %a) {
+; CHECK-LABEL: @test_sqrt_sd_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
+; CHECK-NEXT:    ret double [[TMP4]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
+  %4 = extractelement <2 x double> %3, i32 1
+  ret double %4
+}
+
+define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_add_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a, <2 x double> [[TMP1]])
+; CHECK-NEXT:    ret <2 x double> [[TMP2]]
+;
+  %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a, <2 x double> %1)
+  ret <2 x double> %2
+}
+
 define double @test_add_sd_0(double %a, double %b) {
 ; CHECK-LABEL: @test_add_sd_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
@@ -36,6 +62,36 @@ define double @test_add_sd_0(double %a, double %b) {
   ret double %6
 }
 
+define double @test_add_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_add_sd_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
+; CHECK-NEXT:    ret double [[TMP6]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_sub_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a, <2 x double> [[TMP1]])
+; CHECK-NEXT:    ret <2 x double> [[TMP2]]
+;
+  %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a, <2 x double> %1)
+  ret <2 x double> %2
+}
+
 define double @test_sub_sd_0(double %a, double %b) {
 ; CHECK-LABEL: @test_sub_sd_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub double %a, %b
@@ -50,6 +106,32 @@ define double @test_sub_sd_0(double %a, double %b) {
   ret double %6
 }
 
+define double @test_sub_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_sub_sd_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> <double undef, double 1.000000e+00>, <2 x double> <double undef, double 2.000000e+00>)
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_mul_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a, <2 x double> [[TMP1]])
+; CHECK-NEXT:    ret <2 x double> [[TMP2]]
+;
+  %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a, <2 x double> %1)
+  ret <2 x double> %2
+}
+
 define double @test_mul_sd_0(double %a, double %b) {
 ; CHECK-LABEL: @test_mul_sd_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul double %a, %b
@@ -64,6 +146,32 @@ define double @test_mul_sd_0(double %a, double %b) {
   ret double %6
 }
 
+define double @test_mul_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_mul_sd_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> <double undef, double 1.000000e+00>, <2 x double> <double undef, double 2.000000e+00>)
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_div_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a, <2 x double> [[TMP1]])
+; CHECK-NEXT:    ret <2 x double> [[TMP2]]
+;
+  %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a, <2 x double> %1)
+  ret <2 x double> %2
+}
+
 define double @test_div_sd_0(double %a, double %b) {
 ; CHECK-LABEL: @test_div_sd_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
@@ -83,6 +191,36 @@ define double @test_div_sd_0(double %a, double %b) {
   ret double %6
 }
 
+define double @test_div_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_div_sd_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
+; CHECK-NEXT:    ret double [[TMP6]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_min_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> [[TMP1]])
+; CHECK-NEXT:    ret <2 x double> [[TMP2]]
+;
+  %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> %1)
+  ret <2 x double> %2
+}
+
 define double @test_min_sd_0(double %a, double %b) {
 ; CHECK-LABEL: @test_min_sd_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
@@ -100,6 +238,32 @@ define double @test_min_sd_0(double %a, double %b) {
   ret double %6
 }
 
+define double @test_min_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_min_sd_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> <double undef, double 1.000000e+00>, <2 x double> <double undef, double 2.000000e+00>)
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_max_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> [[TMP1]])
+; CHECK-NEXT:    ret <2 x double> [[TMP2]]
+;
+  %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> %1)
+  ret <2 x double> %2
+}
+
 define double @test_max_sd_0(double %a, double %b) {
 ; CHECK-LABEL: @test_max_sd_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
@@ -117,6 +281,32 @@ define double @test_max_sd_0(double %a, double %b) {
   ret double %6
 }
 
+define double @test_max_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_max_sd_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> <double undef, double 1.000000e+00>, <2 x double> <double undef, double 2.000000e+00>)
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
+; CHECK-NEXT:    ret double [[TMP2]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define <2 x double> @test_cmp_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_cmp_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> [[TMP1]], i8 0)
+; CHECK-NEXT:    ret <2 x double> [[TMP2]]
+;
+  %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> %1, i8 0)
+  ret <2 x double> %2
+}
+
 define double @test_cmp_sd_0(double %a, double %b) {
 ; CHECK-LABEL: @test_cmp_sd_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
@@ -136,6 +326,25 @@ define double @test_cmp_sd_0(double %a, double %b) {
   ret double %6
 }
 
+define double @test_cmp_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_cmp_sd_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]], i8 0)
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
+; CHECK-NEXT:    ret double [[TMP6]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %2, <2 x double> %4, i8 0)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
 define i32 @test_comieq_sd_0(double %a, double %b) {
 ; CHECK-LABEL: @test_comieq_sd_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
diff --git a/llvm/test/Transforms/InstCombine/x86-sse41.ll b/llvm/test/Transforms/InstCombine/x86-sse41.ll
index c109410e854..0441d41c94d 100644
--- a/llvm/test/Transforms/InstCombine/x86-sse41.ll
+++ b/llvm/test/Transforms/InstCombine/x86-sse41.ll
@@ -34,6 +34,25 @@ define double @test_round_sd_0(double %a, double %b) {
   ret double %6
 }
 
+define double @test_round_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_round_sd_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]], i32 10)
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
+; CHECK-NEXT:    ret double [[TMP6]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
 define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @test_round_ss(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> %a, float 1.000000e+00, i32 1
@@ -82,5 +101,32 @@ define float @test_round_ss_0(float %a, float %b) {
   ret float %r
 }
 
+define float @test_round_ss_2(float %a, float %b) {
+; CHECK-LABEL: @test_round_ss_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]], i32 10)
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 2
+; CHECK-NEXT:    ret float [[R]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10)
+  %r = extractelement <4 x float> %9, i32 2
+  ret float %r
+}
+
 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
diff --git a/llvm/test/Transforms/InstCombine/x86-xop.ll b/llvm/test/Transforms/InstCombine/x86-xop.ll
index 90b0a841eed..132ad55fc64 100644
--- a/llvm/test/Transforms/InstCombine/x86-xop.ll
+++ b/llvm/test/Transforms/InstCombine/x86-xop.ll
@@ -16,6 +16,21 @@ define double @test_vfrcz_sd_0(double %a) {
   ret double %4
 }
 
+define double @test_vfrcz_sd_1(double %a) {
+; CHECK-LABEL: @test_vfrcz_sd_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
+; CHECK-NEXT:    ret double [[TMP4]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
+  %4 = extractelement <2 x double> %3, i32 1
+  ret double %4
+}
+
 define float @test_vfrcz_ss_0(float %a) {
 ; CHECK-LABEL: @test_vfrcz_ss_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
@@ -35,6 +50,25 @@ define float @test_vfrcz_ss_0(float %a) {
   ret float %6
 }
 
+define float @test_vfrcz_ss_3(float %a) {
+; CHECK-LABEL: @test_vfrcz_ss_3(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 3
+; CHECK-NEXT:    ret float [[TMP6]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 3
+  ret float %6
+}
+
 define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: @cmp_slt_v2i64(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i64> %a, %b
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2016-04-19 12:59:52 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2016-04-19 12:59:52 +0000
commit	998cffa6b921a469e892269e0961a8eec69bf3fe (patch)
tree	60a6eb74bdf7ee5b37378b05850ea302599c62a1
parent	74b3bfdf71af1b6fb877bfc703244af093df4b3d (diff)
download	bcm5719-llvm-998cffa6b921a469e892269e0961a8eec69bf3fe.tar.gz bcm5719-llvm-998cffa6b921a469e892269e0961a8eec69bf3fe.zip