diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-04-19 12:59:52 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-04-19 12:59:52 +0000 |
commit | 998cffa6b921a469e892269e0961a8eec69bf3fe (patch) | |
tree | 60a6eb74bdf7ee5b37378b05850ea302599c62a1 | |
parent | 74b3bfdf71af1b6fb877bfc703244af093df4b3d (diff) | |
download | bcm5719-llvm-998cffa6b921a469e892269e0961a8eec69bf3fe.tar.gz bcm5719-llvm-998cffa6b921a469e892269e0961a8eec69bf3fe.zip |
[InstCombine][X86] Added extra tests introduced for D17490
llvm-svn: 266732
-rw-r--r-- | llvm/test/Transforms/InstCombine/x86-sse.ll | 289 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/x86-sse2.ll | 209 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/x86-sse41.ll | 46 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/x86-xop.ll | 34 |
4 files changed, 578 insertions, 0 deletions
diff --git a/llvm/test/Transforms/InstCombine/x86-sse.ll b/llvm/test/Transforms/InstCombine/x86-sse.ll index 08509dbd717..80759ae6601 100644 --- a/llvm/test/Transforms/InstCombine/x86-sse.ll +++ b/llvm/test/Transforms/InstCombine/x86-sse.ll @@ -21,6 +21,25 @@ define float @test_rcp_ss_0(float %a) { ret float %6 } +define float @test_rcp_ss_1(float %a) { +; CHECK-LABEL: @test_rcp_ss_1( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 1 +; CHECK-NEXT: ret float [[TMP6]] +; + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4) + %6 = extractelement <4 x float> %5, i32 1 + ret float %6 +} + define float @test_sqrt_ss_0(float %a) { ; CHECK-LABEL: @test_sqrt_ss_0( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 @@ -40,6 +59,25 @@ define float @test_sqrt_ss_0(float %a) { ret float %6 } +define float @test_sqrt_ss_2(float %a) { +; CHECK-LABEL: @test_sqrt_ss_2( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 2 +; CHECK-NEXT: ret float [[TMP6]] +; + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4) + %6 = extractelement <4 x float> %5, i32 2 + ret float %6 +} + define float @test_rsqrt_ss_0(float %a) { ; CHECK-LABEL: @test_rsqrt_ss_0( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 @@ -59,6 +97,40 @@ define float @test_rsqrt_ss_0(float %a) { ret float %6 } +define float @test_rsqrt_ss_3(float %a) { +; CHECK-LABEL: @test_rsqrt_ss_3( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 3 +; CHECK-NEXT: ret float [[TMP6]] +; + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4) + %6 = extractelement <4 x float> %5, i32 3 + ret float %6 +} + +define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: @test_add_ss( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> [[TMP3]]) +; CHECK-NEXT: ret <4 x float> [[TMP4]] +; + %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 + %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 + %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 + %4 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> %3) + ret <4 x float> %4 +} + define float @test_add_ss_0(float %a, float %b) { ; CHECK-LABEL: @test_add_ss_0( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 @@ -86,6 +158,42 @@ define float @test_add_ss_0(float %a, float %b) { ret float %r } +define float @test_add_ss_1(float %a, float %b) { +; CHECK-LABEL: @test_add_ss_1( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> [[TMP4]], <4 x float> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP6]], i32 1 +; CHECK-NEXT: ret float [[TMP7]] +; + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %5) + %7 = extractelement <4 x float> %6, i32 1 + ret float %7 +} + +define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: @test_sub_ss( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> [[TMP3]]) +; CHECK-NEXT: ret <4 x float> [[TMP4]] +; + %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 + %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 + %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 + %4 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> %3) + ret <4 x float> %4 +} + define float @test_sub_ss_0(float %a, float %b) { ; CHECK-LABEL: @test_sub_ss_0( ; CHECK-NEXT: [[TMP1:%.*]] = fsub float %a, %b @@ -104,6 +212,37 @@ define float @test_sub_ss_0(float %a, float %b) { ret float %r } +define float @test_sub_ss_2(float %a, float %b) { +; CHECK-LABEL: @test_sub_ss_2( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> <float undef, float undef, float 2.000000e+00, float undef>, <4 x float> undef) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 2 +; CHECK-NEXT: ret float [[TMP2]] +; + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %5) + %7 = extractelement <4 x float> %6, i32 2 + ret float %7 +} + +define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: @test_mul_ss( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> [[TMP3]]) +; CHECK-NEXT: ret <4 x float> [[TMP4]] +; + %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 + %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 + %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 + %4 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> %3) + ret <4 x float> %4 +} + define float @test_mul_ss_0(float %a, float %b) { ; CHECK-LABEL: @test_mul_ss_0( ; CHECK-NEXT: [[TMP1:%.*]] = fmul float %a, %b @@ -122,6 +261,37 @@ define float @test_mul_ss_0(float %a, float %b) { ret float %r } +define float @test_mul_ss_3(float %a, float %b) { +; CHECK-LABEL: @test_mul_ss_3( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> <float undef, float undef, float undef, float 3.000000e+00>, <4 x float> undef) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 +; CHECK-NEXT: ret float [[TMP2]] +; + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %5) + %7 = extractelement <4 x float> %6, i32 3 + ret float %7 +} + +define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: @test_div_ss( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> [[TMP3]]) +; CHECK-NEXT: ret <4 x float> [[TMP4]] +; + %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 + %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 + %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 + %4 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> %3) + ret <4 x float> %4 +} + define float @test_div_ss_0(float %a, float %b) { ; CHECK-LABEL: @test_div_ss_0( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 @@ -149,6 +319,42 @@ define float @test_div_ss_0(float %a, float %b) { ret float %r } +define float @test_div_ss_1(float %a, float %b) { +; CHECK-LABEL: @test_div_ss_1( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> [[TMP4]], <4 x float> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP6]], i32 1 +; CHECK-NEXT: ret float [[TMP7]] +; + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %5) + %7 = extractelement <4 x float> %6, i32 1 + ret float %7 +} + +define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: @test_min_ss( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> [[TMP3]]) +; CHECK-NEXT: ret <4 x float> [[TMP4]] +; + %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 + %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 + %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 + %4 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %3) + ret <4 x float> %4 +} + define float @test_min_ss_0(float %a, float %b) { ; CHECK-LABEL: @test_min_ss_0( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 @@ -170,6 +376,37 @@ define float @test_min_ss_0(float %a, float %b) { ret float %10 } +define float @test_min_ss_2(float %a, float %b) { +; CHECK-LABEL: @test_min_ss_2( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> <float undef, float undef, float 2.000000e+00, float undef>, <4 x float> undef) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 2 +; CHECK-NEXT: ret float [[TMP2]] +; + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %5) + %7 = extractelement <4 x float> %6, i32 2 + ret float %7 +} + +define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: @test_max_ss( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> [[TMP3]]) +; CHECK-NEXT: ret <4 x float> [[TMP4]] +; + %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 + %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 + %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 + %4 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %3) + ret <4 x float> %4 +} + define float @test_max_ss_0(float %a, float %b) { ; CHECK-LABEL: @test_max_ss_0( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 @@ -191,6 +428,37 @@ define float @test_max_ss_0(float %a, float %b) { ret float %10 } +define float @test_max_ss_3(float %a, float %b) { +; CHECK-LABEL: @test_max_ss_3( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> <float undef, float undef, float undef, float 3.000000e+00>, <4 x float> undef) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 +; CHECK-NEXT: ret float [[TMP2]] +; + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %5) + %7 = extractelement <4 x float> %6, i32 3 + ret float %7 +} + +define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: @test_cmp_ss( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> [[TMP3]], i8 0) +; CHECK-NEXT: ret <4 x float> [[TMP4]] +; + %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 + %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 + %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 + %4 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %3, i8 0) + ret <4 x float> %4 +} + define float @test_cmp_ss_0(float %a, float %b) { ; CHECK-LABEL: @test_cmp_ss_0( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 @@ -218,6 +486,27 @@ define float @test_cmp_ss_0(float %a, float %b) { ret float %r } +define float @test_cmp_ss_1(float %a, float %b) { +; CHECK-LABEL: @test_cmp_ss_1( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP4]], <4 x float> [[TMP5]], i8 0) +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP6]], i32 1 +; CHECK-NEXT: ret float [[TMP7]] +; + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %5, i8 0) + %7 = extractelement <4 x float> %6, i32 1 + ret float %7 +} + define i32 @test_comieq_ss_0(float %a, float %b) { ; CHECK-LABEL: @test_comieq_ss_0( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 diff --git a/llvm/test/Transforms/InstCombine/x86-sse2.ll b/llvm/test/Transforms/InstCombine/x86-sse2.ll index 3a4b49c4a23..0e163b4ec75 100644 --- a/llvm/test/Transforms/InstCombine/x86-sse2.ll +++ b/llvm/test/Transforms/InstCombine/x86-sse2.ll @@ -17,6 +17,32 @@ define double @test_sqrt_sd_0(double %a) { ret double %4 } +define double @test_sqrt_sd_1(double %a) { +; CHECK-LABEL: @test_sqrt_sd_1( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 +; CHECK-NEXT: ret double [[TMP4]] +; + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2) + %4 = extractelement <2 x double> %3, i32 1 + ret double %4 +} + +define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @test_add_sd( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a, <2 x double> [[TMP1]]) +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; + %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 + %2 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a, <2 x double> %1) + ret <2 x double> %2 +} + define double @test_add_sd_0(double %a, double %b) { ; CHECK-LABEL: @test_add_sd_0( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 @@ -36,6 +62,36 @@ define double @test_add_sd_0(double %a, double %b) { ret double %6 } +define double @test_add_sd_1(double %a, double %b) { +; CHECK-LABEL: @test_add_sd_1( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 +; CHECK-NEXT: ret double [[TMP6]] +; + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4) + %6 = extractelement <2 x double> %5, i32 1 + ret double %6 +} + +define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @test_sub_sd( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a, <2 x double> [[TMP1]]) +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; + %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 + %2 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a, <2 x double> %1) + ret <2 x double> %2 +} + define double @test_sub_sd_0(double %a, double %b) { ; CHECK-LABEL: @test_sub_sd_0( ; CHECK-NEXT: [[TMP1:%.*]] = fsub double %a, %b @@ -50,6 +106,32 @@ define double @test_sub_sd_0(double %a, double %b) { ret double %6 } +define double @test_sub_sd_1(double %a, double %b) { +; CHECK-LABEL: @test_sub_sd_1( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> <double undef, double 1.000000e+00>, <2 x double> <double undef, double 2.000000e+00>) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4) + %6 = extractelement <2 x double> %5, i32 1 + ret double %6 +} + +define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @test_mul_sd( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a, <2 x double> [[TMP1]]) +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; + %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 + %2 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a, <2 x double> %1) + ret <2 x double> %2 +} + define double @test_mul_sd_0(double %a, double %b) { ; CHECK-LABEL: @test_mul_sd_0( ; CHECK-NEXT: [[TMP1:%.*]] = fmul double %a, %b @@ -64,6 +146,32 @@ define double @test_mul_sd_0(double %a, double %b) { ret double %6 } +define double @test_mul_sd_1(double %a, double %b) { +; CHECK-LABEL: @test_mul_sd_1( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> <double undef, double 1.000000e+00>, <2 x double> <double undef, double 2.000000e+00>) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4) + %6 = extractelement <2 x double> %5, i32 1 + ret double %6 +} + +define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @test_div_sd( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a, <2 x double> [[TMP1]]) +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; + %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 + %2 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a, <2 x double> %1) + ret <2 x double> %2 +} + define double @test_div_sd_0(double %a, double %b) { ; CHECK-LABEL: @test_div_sd_0( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 @@ -83,6 +191,36 @@ define double @test_div_sd_0(double %a, double %b) { ret double %6 } +define double @test_div_sd_1(double %a, double %b) { +; CHECK-LABEL: @test_div_sd_1( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 +; CHECK-NEXT: ret double [[TMP6]] +; + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4) + %6 = extractelement <2 x double> %5, i32 1 + ret double %6 +} + +define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @test_min_sd( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> [[TMP1]]) +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; + %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 + %2 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> %1) + ret <2 x double> %2 +} + define double @test_min_sd_0(double %a, double %b) { ; CHECK-LABEL: @test_min_sd_0( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 @@ -100,6 +238,32 @@ define double @test_min_sd_0(double %a, double %b) { ret double %6 } +define double @test_min_sd_1(double %a, double %b) { +; CHECK-LABEL: @test_min_sd_1( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> <double undef, double 1.000000e+00>, <2 x double> <double undef, double 2.000000e+00>) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %2, <2 x double> %4) + %6 = extractelement <2 x double> %5, i32 1 + ret double %6 +} + +define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @test_max_sd( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> [[TMP1]]) +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; + %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 + %2 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> %1) + ret <2 x double> %2 +} + define double @test_max_sd_0(double %a, double %b) { ; CHECK-LABEL: @test_max_sd_0( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 @@ -117,6 +281,32 @@ define double @test_max_sd_0(double %a, double %b) { ret double %6 } +define double @test_max_sd_1(double %a, double %b) { +; CHECK-LABEL: @test_max_sd_1( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> <double undef, double 1.000000e+00>, <2 x double> <double undef, double 2.000000e+00>) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 +; CHECK-NEXT: ret double [[TMP2]] +; + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %2, <2 x double> %4) + %6 = extractelement <2 x double> %5, i32 1 + ret double %6 +} + +define <2 x double> @test_cmp_sd(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @test_cmp_sd( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> [[TMP1]], i8 0) +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; + %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 + %2 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> %1, i8 0) + ret <2 x double> %2 +} + define double @test_cmp_sd_0(double %a, double %b) { ; CHECK-LABEL: @test_cmp_sd_0( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 @@ -136,6 +326,25 @@ define double @test_cmp_sd_0(double %a, double %b) { ret double %6 } +define double @test_cmp_sd_1(double %a, double %b) { +; CHECK-LABEL: @test_cmp_sd_1( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]], i8 0) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 +; CHECK-NEXT: ret double [[TMP6]] +; + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %2, <2 x double> %4, i8 0) + %6 = extractelement <2 x double> %5, i32 1 + ret double %6 +} + define i32 @test_comieq_sd_0(double %a, double %b) { ; CHECK-LABEL: @test_comieq_sd_0( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 diff --git a/llvm/test/Transforms/InstCombine/x86-sse41.ll b/llvm/test/Transforms/InstCombine/x86-sse41.ll index c109410e854..0441d41c94d 100644 --- a/llvm/test/Transforms/InstCombine/x86-sse41.ll +++ b/llvm/test/Transforms/InstCombine/x86-sse41.ll @@ -34,6 +34,25 @@ define double @test_round_sd_0(double %a, double %b) { ret double %6 } +define double @test_round_sd_1(double %a, double %b) { +; CHECK-LABEL: @test_round_sd_1( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]], i32 10) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 +; CHECK-NEXT: ret double [[TMP6]] +; + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10) + %6 = extractelement <2 x double> %5, i32 1 + ret double %6 +} + define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @test_round_ss( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %a, float 1.000000e+00, i32 1 @@ -82,5 +101,32 @@ define float @test_round_ss_0(float %a, float %b) { ret float %r } +define float @test_round_ss_2(float %a, float %b) { +; CHECK-LABEL: @test_round_ss_2( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]], i32 10) +; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 2 +; CHECK-NEXT: ret float [[R]] +; + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 + %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 + %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 + %9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10) + %r = extractelement <4 x float> %9, i32 2 + ret float %r +} + declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone diff --git a/llvm/test/Transforms/InstCombine/x86-xop.ll b/llvm/test/Transforms/InstCombine/x86-xop.ll index 90b0a841eed..132ad55fc64 100644 --- a/llvm/test/Transforms/InstCombine/x86-xop.ll +++ b/llvm/test/Transforms/InstCombine/x86-xop.ll @@ -16,6 +16,21 @@ define double @test_vfrcz_sd_0(double %a) { ret double %4 } +define double @test_vfrcz_sd_1(double %a) { +; CHECK-LABEL: @test_vfrcz_sd_1( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 +; CHECK-NEXT: ret double [[TMP4]] +; + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2) + %4 = extractelement <2 x double> %3, i32 1 + ret double %4 +} + define float @test_vfrcz_ss_0(float %a) { ; CHECK-LABEL: @test_vfrcz_ss_0( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 @@ -35,6 +50,25 @@ define float @test_vfrcz_ss_0(float %a) { ret float %6 } +define float @test_vfrcz_ss_3(float %a) { +; CHECK-LABEL: @test_vfrcz_ss_3( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 3 +; CHECK-NEXT: ret float [[TMP6]] +; + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4) + %6 = extractelement <4 x float> %5, i32 3 + ret float %6 +} + define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: @cmp_slt_v2i64( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i64> %a, %b |