diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll | 30 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-intrinsics-x86.ll | 33 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll | 176 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll | 37 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 37 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fold-load-unops.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll | 38 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse-intrinsics-x86.ll | 42 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll | 101 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll | 87 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse_partial_update.ll | 10 |
15 files changed, 419 insertions, 252 deletions
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll index 0e445ba3a4e..2cb6449f79a 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -3018,10 +3018,12 @@ define <4 x double> @test_mm256_sqrt_pd(<4 x double> %a0) nounwind { ; X64: # %bb.0: ; X64-NEXT: vsqrtpd %ymm0, %ymm0 ; X64-NEXT: retq - %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) - ret <4 x double> %res +entry: + %0 = tail call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a0) #2 + ret <4 x double> %0 } -declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone + +declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) #1 define <8 x float> @test_mm256_sqrt_ps(<8 x float> %a0) nounwind { ; X32-LABEL: test_mm256_sqrt_ps: @@ -3033,10 +3035,12 @@ define <8 x float> @test_mm256_sqrt_ps(<8 x float> %a0) nounwind { ; X64: # %bb.0: ; X64-NEXT: vsqrtps %ymm0, %ymm0 ; X64-NEXT: retq - %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) - ret <8 x float> %res +entry: + %0 = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %a0) #2 + ret <8 x float> %0 } -declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone + +declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #1 define void @test_mm256_store_pd(double* %a0, <4 x double> %a1) nounwind { ; X32-LABEL: test_mm256_store_pd: diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll index 21612506f85..2c6571b00d0 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll @@ -6,6 +6,36 @@ ; We don't check any vinsertf128 variant with immediate 0 because that's just a blend. +define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) { +; AVX-LABEL: test_x86_avx_sqrt_pd_256: +; AVX: # %bb.0: +; AVX-NEXT: vsqrtpd %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x51,0xc0] +; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512VL-LABEL: test_x86_avx_sqrt_pd_256: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vsqrtpd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x51,0xc0] +; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone + +define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) { +; AVX-LABEL: test_x86_avx_sqrt_ps_256: +; AVX: # %bb.0: +; AVX-NEXT: vsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x51,0xc0] +; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512VL-LABEL: test_x86_avx_sqrt_ps_256: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vsqrtps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x51,0xc0] +; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone + define <4 x double> @test_x86_avx_vinsertf128_pd_256_1(<4 x double> %a0, <2 x double> %a1) { ; AVX-LABEL: test_x86_avx_vinsertf128_pd_256_1: ; AVX: # %bb.0: diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll index ace9ae24f27..2fd2b863859 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -622,39 +622,6 @@ define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) { } declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone - -define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) { -; AVX-LABEL: test_x86_avx_sqrt_pd_256: -; AVX: # %bb.0: -; AVX-NEXT: vsqrtpd %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x51,0xc0] -; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] -; -; AVX512VL-LABEL: test_x86_avx_sqrt_pd_256: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vsqrtpd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x51,0xc0] -; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1] - ret <4 x double> %res -} -declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone - - -define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) { -; AVX-LABEL: test_x86_avx_sqrt_ps_256: -; AVX: # %bb.0: -; AVX-NEXT: vsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x51,0xc0] -; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] -; -; AVX512VL-LABEL: test_x86_avx_sqrt_ps_256: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vsqrtps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x51,0xc0] -; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] - ret <8 x float> %res -} -declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone - - define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) { ; AVX-LABEL: test_x86_avx_vpermilvar_pd: ; AVX: # %bb.0: diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll index b119dbcdc9c..b12493a8918 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll @@ -6375,6 +6375,182 @@ declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) # declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #8 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) #8 +define <2 x double> @test_mm_mask_sqrt_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A) { +; X32-LABEL: test_mm_mask_sqrt_pd: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vsqrtpd %xmm1, %xmm0 {%k1} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_mask_sqrt_pd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vsqrtpd %xmm1, %xmm0 {%k1} +; X64-NEXT: retq +entry: + %0 = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %__A) #2 + %1 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> + %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> %__W + ret <2 x double> %2 +} + +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) + +define <2 x double> @test_mm_maskz_sqrt_pd(i8 zeroext %__U, <2 x double> %__A) { +; X32-LABEL: test_mm_maskz_sqrt_pd: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vsqrtpd %xmm0, %xmm0 {%k1} {z} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_maskz_sqrt_pd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vsqrtpd %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: retq +entry: + %0 = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %__A) #2 + %1 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> + %2 = select <2 x i1> %extract.i, <2 x double> %0, <2 x double> zeroinitializer + ret <2 x double> %2 +} + +define <4 x double> @test_mm256_mask_sqrt_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A) { +; X32-LABEL: test_mm256_mask_sqrt_pd: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vsqrtpd %ymm1, %ymm0 {%k1} +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_mask_sqrt_pd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vsqrtpd %ymm1, %ymm0 {%k1} +; X64-NEXT: retq +entry: + %0 = tail call <4 x double> @llvm.sqrt.v4f64(<4 x double> %__A) #2 + %1 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> %__W + ret <4 x double> %2 +} + +declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) + +define <4 x double> @test_mm256_maskz_sqrt_pd(i8 zeroext %__U, <4 x double> %__A) { +; X32-LABEL: test_mm256_maskz_sqrt_pd: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_maskz_sqrt_pd: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} +; X64-NEXT: retq +entry: + %0 = tail call <4 x double> @llvm.sqrt.v4f64(<4 x double> %__A) #2 + %1 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %2 = select <4 x i1> %extract.i, <4 x double> %0, <4 x double> zeroinitializer + ret <4 x double> %2 +} + +define <4 x float> @test_mm_mask_sqrt_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A) { +; X32-LABEL: test_mm_mask_sqrt_ps: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vsqrtps %xmm1, %xmm0 {%k1} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_mask_sqrt_ps: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vsqrtps %xmm1, %xmm0 {%k1} +; X64-NEXT: retq +entry: + %0 = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %__A) #2 + %1 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> %__W + ret <4 x float> %2 +} + +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) + +define <4 x float> @test_mm_maskz_sqrt_ps(i8 zeroext %__U, <4 x float> %__A) { +; X32-LABEL: test_mm_maskz_sqrt_ps: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vsqrtps %xmm0, %xmm0 {%k1} {z} +; X32-NEXT: retl +; +; X64-LABEL: test_mm_maskz_sqrt_ps: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vsqrtps %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: retq +entry: + %0 = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %__A) #2 + %1 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %2 = select <4 x i1> %extract.i, <4 x float> %0, <4 x float> zeroinitializer + ret <4 x float> %2 +} + +define <8 x float> @test_mm256_mask_sqrt_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A) { +; X32-LABEL: test_mm256_mask_sqrt_ps: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vsqrtps %ymm1, %ymm0 {%k1} +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_mask_sqrt_ps: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vsqrtps %ymm1, %ymm0 {%k1} +; X64-NEXT: retq +entry: + %0 = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %__A) #2 + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> %__W + ret <8 x float> %2 +} + +define <8 x float> @test_mm256_maskz_sqrt_ps(i8 zeroext %__U, <8 x float> %__A) { +; X32-LABEL: test_mm256_maskz_sqrt_ps: +; X32: # %bb.0: # %entry +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: kmovw %eax, %k1 +; X32-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} +; X32-NEXT: retl +; +; X64-LABEL: test_mm256_maskz_sqrt_ps: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} +; X64-NEXT: retq +entry: + %0 = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %__A) #2 + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> zeroinitializer + ret <8 x float> %2 +} + +declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) + +declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) +declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8) declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double>, <4 x float>, i8) diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll index de021a369ac..a13148599d1 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -12070,3 +12070,40 @@ define <8 x i32> @test_expand_load_d_256(i8* %addr, <8 x i32> %data) { %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 -1) ret <8 x i32> %res } + +define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) { +; X86-LABEL: test_sqrt_pd_256: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_sqrt_pd_256: +; X64: # %bb.0: +; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask) + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone + +define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) { +; X86-LABEL: test_sqrt_ps_256: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_sqrt_ps_256: +; X64: # %bb.0: +; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0] +; X64-NEXT: retq # encoding: [0xc3] + %res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask) + ret <8 x float> %res +} + +declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index 19de714ce8f..6b4b68fb4c1 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -999,43 +999,6 @@ define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 % } declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) -define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) { -; X86-LABEL: test_sqrt_pd_256: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_sqrt_pd_256: -; X64: # %bb.0: -; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask) - ret <4 x double> %res -} -declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone - -define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) { -; X86-LABEL: test_sqrt_ps_256: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_sqrt_ps_256: -; X64: # %bb.0: -; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask) - ret <8 x float> %res -} - -declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone - define <4 x double> @test_getexp_pd_256(<4 x double> %a0) { ; CHECK-LABEL: test_getexp_pd_256: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/X86/fold-load-unops.ll b/llvm/test/CodeGen/X86/fold-load-unops.ll index 7feb66525e2..c77c6adf2e8 100644 --- a/llvm/test/CodeGen/X86/fold-load-unops.ll +++ b/llvm/test/CodeGen/X86/fold-load-unops.ll @@ -165,12 +165,14 @@ define float @sqrtss_size(float* %a) optsize{ define <4 x float> @sqrtss_full_size(<4 x float>* %a) optsize{ ; SSE-LABEL: sqrtss_full_size: ; SSE: # %bb.0: -; SSE-NEXT: sqrtss (%rdi), %xmm0 +; SSE-NEXT: movaps (%rdi), %xmm0 +; SSE-NEXT: sqrtss %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: sqrtss_full_size: ; AVX: # %bb.0: -; AVX-NEXT: vsqrtss (%rdi), %xmm0, %xmm0 +; AVX-NEXT: vmovaps (%rdi), %xmm0 +; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %ld = load <4 x float>, <4 x float>* %a %res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ld) @@ -197,12 +199,14 @@ define double @sqrtsd_size(double* %a) optsize { define <2 x double> @sqrtsd_full_size(<2 x double>* %a) optsize { ; SSE-LABEL: sqrtsd_full_size: ; SSE: # %bb.0: -; SSE-NEXT: sqrtsd (%rdi), %xmm0 +; SSE-NEXT: movapd (%rdi), %xmm0 +; SSE-NEXT: sqrtsd %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: sqrtsd_full_size: ; AVX: # %bb.0: -; AVX-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0 +; AVX-NEXT: vmovapd (%rdi), %xmm0 +; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %ld = load <2 x double>, <2 x double>* %a %res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ld) diff --git a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll index 16dedb74366..a660293d6f7 100644 --- a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll @@ -2075,10 +2075,10 @@ define <4 x float> @test_mm_sqrt_ps(<4 x float> %a0) { ; AVX: # %bb.0: ; AVX-NEXT: vsqrtps %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} - %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) + %res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0) ret <4 x float> %res } -declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readnone define <4 x float> @test_mm_sqrt_ss(<4 x float> %a0) { ; SSE-LABEL: test_mm_sqrt_ss: @@ -2090,10 +2090,12 @@ define <4 x float> @test_mm_sqrt_ss(<4 x float> %a0) { ; AVX: # %bb.0: ; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} - %sqrt = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) - ret <4 x float> %sqrt + %ext = extractelement <4 x float> %a0, i32 0 + %sqrt = call float @llvm.sqrt.f32(float %ext) + %ins = insertelement <4 x float> %a0, float %sqrt, i32 0 + ret <4 x float> %ins } -declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone +declare float @llvm.sqrt.f32(float) nounwind readnone define void @test_mm_store_ps(float *%a0, <4 x float> %a1) { ; X86-SSE-LABEL: test_mm_store_ps: diff --git a/llvm/test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll index 60a455ae148..d153d2f42d6 100644 --- a/llvm/test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll +++ b/llvm/test/CodeGen/X86/sse-intrinsics-x86-upgrade.ll @@ -6,6 +6,44 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 + +define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) { +; SSE-LABEL: test_x86_sse_sqrt_ps: +; SSE: ## %bb.0: +; SSE-NEXT: sqrtps %xmm0, %xmm0 ## encoding: [0x0f,0x51,0xc0] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX1-LABEL: test_x86_sse_sqrt_ps: +; AVX1: ## %bb.0: +; AVX1-NEXT: vsqrtps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x51,0xc0] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX512-LABEL: test_x86_sse_sqrt_ps: +; AVX512: ## %bb.0: +; AVX512-NEXT: vsqrtps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x51,0xc0] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] + %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) { +; SSE-LABEL: test_x86_sse_sqrt_ss: +; SSE: ## %bb.0: +; SSE-NEXT: sqrtss %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x51,0xc0] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX-LABEL: test_x86_sse_sqrt_ss: +; AVX: ## %bb.0: +; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x51,0xc0] +; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] + %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone + + define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { ; X86-SSE-LABEL: test_x86_sse_storeu_ps: ; X86-SSE: ## %bb.0: diff --git a/llvm/test/CodeGen/X86/sse-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse-intrinsics-x86.ll index 0014da6b2ec..63ccda1d66b 100644 --- a/llvm/test/CodeGen/X86/sse-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse-intrinsics-x86.ll @@ -448,48 +448,6 @@ define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) { declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone -define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) { -; SSE-LABEL: test_x86_sse_sqrt_ps: -; SSE: ## %bb.0: -; SSE-NEXT: sqrtps %xmm0, %xmm0 ## encoding: [0x0f,0x51,0xc0] -; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX1-LABEL: test_x86_sse_sqrt_ps: -; AVX1: ## %bb.0: -; AVX1-NEXT: vsqrtps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x51,0xc0] -; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX512-LABEL: test_x86_sse_sqrt_ps: -; AVX512: ## %bb.0: -; AVX512-NEXT: vsqrtps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x51,0xc0] -; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] - %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone - - -define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) { -; SSE-LABEL: test_x86_sse_sqrt_ss: -; SSE: ## %bb.0: -; SSE-NEXT: sqrtss %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x51,0xc0] -; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX1-LABEL: test_x86_sse_sqrt_ss: -; AVX1: ## %bb.0: -; AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x51,0xc0] -; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX512-LABEL: test_x86_sse_sqrt_ss: -; AVX512: ## %bb.0: -; AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] -; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] - %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone - - define void @test_x86_sse_stmxcsr(i8* %a0) { ; X86-SSE-LABEL: test_x86_sse_stmxcsr: ; X86-SSE: ## %bb.0: diff --git a/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll b/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll index 1a294daf1ea..39bbb8f6538 100644 --- a/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll +++ b/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll @@ -83,26 +83,22 @@ define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { define <4 x float> @test_sqrt_ss(<4 x float> %a) { ; SSE2-LABEL: test_sqrt_ss: ; SSE2: # %bb.0: -; SSE2-NEXT: sqrtss %xmm0, %xmm1 -; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; SSE2-NEXT: sqrtss %xmm0, %xmm0 ; SSE2-NEXT: ret{{[l|q]}} ; ; SSE41-LABEL: test_sqrt_ss: ; SSE41: # %bb.0: -; SSE41-NEXT: sqrtss %xmm0, %xmm1 -; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; SSE41-NEXT: sqrtss %xmm0, %xmm0 ; SSE41-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_sqrt_ss: ; AVX1: # %bb.0: -; AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm1 -; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_sqrt_ss: ; AVX512: # %bb.0: -; AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: ret{{[l|q]}} %1 = extractelement <4 x float> %a, i32 0 %2 = call float @llvm.sqrt.f32(float %1) @@ -182,26 +178,22 @@ define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) { define <2 x double> @test_sqrt_sd(<2 x double> %a) { ; SSE2-LABEL: test_sqrt_sd: ; SSE2: # %bb.0: -; SSE2-NEXT: sqrtsd %xmm0, %xmm1 -; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE2-NEXT: sqrtsd %xmm0, %xmm0 ; SSE2-NEXT: ret{{[l|q]}} ; ; SSE41-LABEL: test_sqrt_sd: ; SSE41: # %bb.0: -; SSE41-NEXT: sqrtsd %xmm0, %xmm1 -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE41-NEXT: sqrtsd %xmm0, %xmm0 ; SSE41-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_sqrt_sd: ; AVX1: # %bb.0: -; AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm1 -; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_sqrt_sd: ; AVX512: # %bb.0: -; AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: ret{{[l|q]}} %1 = extractelement <2 x double> %a, i32 0 %2 = call double @llvm.sqrt.f64(double %1) diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index abc45b02d0d..31046e94c34 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -3720,10 +3720,10 @@ define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind { ; AVX: # %bb.0: ; AVX-NEXT: vsqrtpd %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} - %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) + %res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a0) ret <2 x double> %res } -declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) nounwind readnone define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_sqrt_sd: @@ -3736,14 +3736,12 @@ define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwin ; AVX: # %bb.0: ; AVX-NEXT: vsqrtsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: ret{{[l|q]}} - %call = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) - %ext0 = extractelement <2 x double> %call, i32 0 - %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 - %ext1 = extractelement <2 x double> %a1, i32 1 - %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 - ret <2 x double> %ins1 + %ext = extractelement <2 x double> %a0, i32 0 + %sqrt = call double @llvm.sqrt.f64(double %ext) + %ins = insertelement <2 x double> %a1, double %sqrt, i32 0 + ret <2 x double> %ins } -declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone +declare double @llvm.sqrt.f64(double) nounwind readnone define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_sra_epi16: diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll index 91b19fc4fc4..f29b474ea0b 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll @@ -6,6 +6,89 @@ ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 + +define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { +; SSE-LABEL: test_x86_sse2_sqrt_pd: +; SSE: ## %bb.0: +; SSE-NEXT: sqrtpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x51,0xc0] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX1-LABEL: test_x86_sse2_sqrt_pd: +; AVX1: ## %bb.0: +; AVX1-NEXT: vsqrtpd %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x51,0xc0] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX512-LABEL: test_x86_sse2_sqrt_pd: +; AVX512: ## %bb.0: +; AVX512-NEXT: vsqrtpd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] + %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { +; SSE-LABEL: test_x86_sse2_sqrt_sd: +; SSE: ## %bb.0: +; SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX-LABEL: test_x86_sse2_sqrt_sd: +; AVX: ## %bb.0: +; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0] +; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] + %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_sqrt_sd_vec_load(<2 x double>* %a0) { +; X86-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load: +; X86-SSE: ## %bb.0: +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movapd (%eax), %xmm0 ## encoding: [0x66,0x0f,0x28,0x00] +; X86-SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0] +; X86-SSE-NEXT: retl ## encoding: [0xc3] +; +; X86-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load: +; X86-AVX1: ## %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovapd (%eax), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x00] +; X86-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0] +; X86-AVX1-NEXT: retl ## encoding: [0xc3] +; +; X86-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load: +; X86-AVX512: ## %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovapd (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00] +; X86-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0] +; X86-AVX512-NEXT: retl ## encoding: [0xc3] +; +; X64-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load: +; X64-SSE: ## %bb.0: +; X64-SSE-NEXT: movapd (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x28,0x07] +; X64-SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0] +; X64-SSE-NEXT: retq ## encoding: [0xc3] +; +; X64-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load: +; X64-AVX1: ## %bb.0: +; X64-AVX1-NEXT: vmovapd (%rdi), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x07] +; X64-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0] +; X64-AVX1-NEXT: retq ## encoding: [0xc3] +; +; X64-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load: +; X64-AVX512: ## %bb.0: +; X64-AVX512-NEXT: vmovapd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x07] +; X64-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0] +; X64-AVX512-NEXT: retq ## encoding: [0xc3] + %a1 = load <2 x double>, <2 x double>* %a0, align 16 + %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} + + define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) { ; SSE-LABEL: test_x86_sse2_psll_dq_bs: ; SSE: ## %bb.0: @@ -241,8 +324,8 @@ define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { ; X86-SSE: ## %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] ; X86-SSE-NEXT: xorpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x57,0xc9] -; X86-SSE-NEXT: movhpd LCPI8_0, %xmm1 ## encoding: [0x66,0x0f,0x16,0x0d,A,A,A,A] -; X86-SSE-NEXT: ## fixup A - offset: 4, value: LCPI8_0, kind: FK_Data_4 +; X86-SSE-NEXT: movhpd LCPI11_0, %xmm1 ## encoding: [0x66,0x0f,0x16,0x0d,A,A,A,A] +; X86-SSE-NEXT: ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4 ; X86-SSE-NEXT: ## xmm1 = xmm1[0],mem[0] ; X86-SSE-NEXT: addpd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x58,0xc8] ; X86-SSE-NEXT: movupd %xmm1, (%eax) ## encoding: [0x66,0x0f,0x11,0x08] @@ -252,8 +335,8 @@ define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { ; X86-AVX1: ## %bb.0: ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] ; X86-AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9] -; X86-AVX1-NEXT: vmovhpd LCPI8_0, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A] -; X86-AVX1-NEXT: ## fixup A - offset: 4, value: LCPI8_0, kind: FK_Data_4 +; X86-AVX1-NEXT: vmovhpd LCPI11_0, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A] +; X86-AVX1-NEXT: ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4 ; X86-AVX1-NEXT: ## xmm1 = xmm1[0],mem[0] ; X86-AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc1] ; X86-AVX1-NEXT: vmovupd %xmm0, (%eax) ## encoding: [0xc5,0xf9,0x11,0x00] @@ -262,8 +345,8 @@ define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { ; X86-AVX512-LABEL: test_x86_sse2_storeu_pd: ; X86-AVX512: ## %bb.0: ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX512-NEXT: vmovsd LCPI8_0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A] -; X86-AVX512-NEXT: ## fixup A - offset: 4, value: LCPI8_0, kind: FK_Data_4 +; X86-AVX512-NEXT: vmovsd LCPI11_0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A] +; X86-AVX512-NEXT: ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4 ; X86-AVX512-NEXT: ## xmm1 = mem[0],zero ; X86-AVX512-NEXT: vpslldq $8, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08] ; X86-AVX512-NEXT: ## xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] @@ -275,7 +358,7 @@ define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { ; X64-SSE: ## %bb.0: ; X64-SSE-NEXT: xorpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x57,0xc9] ; X64-SSE-NEXT: movhpd {{.*}}(%rip), %xmm1 ## encoding: [0x66,0x0f,0x16,0x0d,A,A,A,A] -; X64-SSE-NEXT: ## fixup A - offset: 4, value: LCPI8_0-4, kind: reloc_riprel_4byte +; X64-SSE-NEXT: ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte ; X64-SSE-NEXT: ## xmm1 = xmm1[0],mem[0] ; X64-SSE-NEXT: addpd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x58,0xc8] ; X64-SSE-NEXT: movupd %xmm1, (%rdi) ## encoding: [0x66,0x0f,0x11,0x0f] @@ -285,7 +368,7 @@ define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { ; X64-AVX1: ## %bb.0: ; X64-AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9] ; X64-AVX1-NEXT: vmovhpd {{.*}}(%rip), %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A] -; X64-AVX1-NEXT: ## fixup A - offset: 4, value: LCPI8_0-4, kind: reloc_riprel_4byte +; X64-AVX1-NEXT: ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte ; X64-AVX1-NEXT: ## xmm1 = xmm1[0],mem[0] ; X64-AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc1] ; X64-AVX1-NEXT: vmovupd %xmm0, (%rdi) ## encoding: [0xc5,0xf9,0x11,0x07] @@ -294,7 +377,7 @@ define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { ; X64-AVX512-LABEL: test_x86_sse2_storeu_pd: ; X64-AVX512: ## %bb.0: ; X64-AVX512-NEXT: vmovsd {{.*}}(%rip), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A] -; X64-AVX512-NEXT: ## fixup A - offset: 4, value: LCPI8_0-4, kind: reloc_riprel_4byte +; X64-AVX512-NEXT: ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte ; X64-AVX512-NEXT: ## xmm1 = mem[0],zero ; X64-AVX512-NEXT: vpslldq $8, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08] ; X64-AVX512-NEXT: ## xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll index 337e3ac4ab6..5e7b351ad13 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -1607,93 +1607,6 @@ define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone -define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { -; SSE-LABEL: test_x86_sse2_sqrt_pd: -; SSE: ## %bb.0: -; SSE-NEXT: sqrtpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x51,0xc0] -; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX1-LABEL: test_x86_sse2_sqrt_pd: -; AVX1: ## %bb.0: -; AVX1-NEXT: vsqrtpd %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x51,0xc0] -; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX512-LABEL: test_x86_sse2_sqrt_pd: -; AVX512: ## %bb.0: -; AVX512-NEXT: vsqrtpd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0] -; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] - %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} -declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone - - -define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { -; SSE-LABEL: test_x86_sse2_sqrt_sd: -; SSE: ## %bb.0: -; SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0] -; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX1-LABEL: test_x86_sse2_sqrt_sd: -; AVX1: ## %bb.0: -; AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0] -; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX512-LABEL: test_x86_sse2_sqrt_sd: -; AVX512: ## %bb.0: -; AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] -; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] - %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} -declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone - - -define <2 x double> @test_x86_sse2_sqrt_sd_vec_load(<2 x double>* %a0) { -; X86-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load: -; X86-SSE: ## %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-SSE-NEXT: movapd (%eax), %xmm0 ## encoding: [0x66,0x0f,0x28,0x00] -; X86-SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0] -; X86-SSE-NEXT: retl ## encoding: [0xc3] -; -; X86-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load: -; X86-AVX1: ## %bb.0: -; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX1-NEXT: vmovapd (%eax), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x00] -; X86-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0] -; X86-AVX1-NEXT: retl ## encoding: [0xc3] -; -; X86-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load: -; X86-AVX512: ## %bb.0: -; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX512-NEXT: vmovapd (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00] -; X86-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] -; X86-AVX512-NEXT: retl ## encoding: [0xc3] -; -; X64-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load: -; X64-SSE: ## %bb.0: -; X64-SSE-NEXT: movapd (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x28,0x07] -; X64-SSE-NEXT: sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0] -; X64-SSE-NEXT: retq ## encoding: [0xc3] -; -; X64-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load: -; X64-AVX1: ## %bb.0: -; X64-AVX1-NEXT: vmovapd (%rdi), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x07] -; X64-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0] -; X64-AVX1-NEXT: retq ## encoding: [0xc3] -; -; X64-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load: -; X64-AVX512: ## %bb.0: -; X64-AVX512-NEXT: vmovapd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x07] -; X64-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] -; X64-AVX512-NEXT: retq ## encoding: [0xc3] - %a1 = load <2 x double>, <2 x double>* %a0, align 16 - %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a1) ; <<2 x double>> [#uses=1] - ret <2 x double> %res -} - - define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_x86_sse2_ucomieq_sd: ; SSE: ## %bb.0: diff --git a/llvm/test/CodeGen/X86/sse_partial_update.ll b/llvm/test/CodeGen/X86/sse_partial_update.ll index d6929930844..db575d62380 100644 --- a/llvm/test/CodeGen/X86/sse_partial_update.ll +++ b/llvm/test/CodeGen/X86/sse_partial_update.ll @@ -54,9 +54,10 @@ declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone define void @sqrtss(<4 x float> %a) nounwind uwtable ssp { ; CHECK-LABEL: sqrtss: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: sqrtss %xmm0, %xmm0 -; CHECK-NEXT: cvtss2sd %xmm0, %xmm2 +; CHECK-NEXT: sqrtss %xmm0, %xmm1 +; CHECK-NEXT: cvtss2sd %xmm1, %xmm2 ; CHECK-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: cvtss2sd %xmm0, %xmm1 ; CHECK-NEXT: movaps %xmm2, %xmm0 ; CHECK-NEXT: jmp _callee ## TAILCALL @@ -75,9 +76,10 @@ declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone define void @sqrtsd(<2 x double> %a) nounwind uwtable ssp { ; CHECK-LABEL: sqrtsd: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: sqrtsd %xmm0, %xmm0 -; CHECK-NEXT: cvtsd2ss %xmm0, %xmm2 +; CHECK-NEXT: sqrtsd %xmm0, %xmm1 +; CHECK-NEXT: cvtsd2ss %xmm1, %xmm2 ; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1 ; CHECK-NEXT: movaps %xmm2, %xmm0 ; CHECK-NEXT: jmp _callee2 ## TAILCALL |

