; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE ; RUN: llc -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 ; RUN: llc -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 ; RUN: llc -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse,-sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE ; RUN: llc -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 ; RUN: llc -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse-builtins.c define <4 x float> @test_mm_add_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_add_ps: ; SSE: # %bb.0: ; SSE-NEXT: addps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_add_ps: ; AVX: # %bb.0: ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = fadd <4 x float> %a0, %a1 ret <4 x float> %res } define <4 x float> @test_mm_add_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_add_ss: ; SSE: # %bb.0: ; SSE-NEXT: addss %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_add_ss: ; AVX: # %bb.0: ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %ext0 = extractelement <4 x float> %a0, i32 0 %ext1 = extractelement <4 x float> %a1, i32 0 %fadd = fadd float %ext0, %ext1 %res = insertelement <4 x float> %a0, float %fadd, i32 0 ret <4 x float> %res } define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_and_ps: ; SSE: # %bb.0: ; SSE-NEXT: andps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_and_ps: ; AVX: # %bb.0: ; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <4 x float> %a0 to <4 x i32> %arg1 = bitcast <4 x float> %a1 to <4 x i32> %res = and <4 x i32> %arg0, %arg1 %bc = bitcast <4 x i32> %res to <4 x float> ret <4 x float> %bc } define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_andnot_ps: ; SSE: # %bb.0: ; SSE-NEXT: andnps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_andnot_ps: ; AVX: # %bb.0: ; AVX-NEXT: vandnps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <4 x float> %a0 to <4 x i32> %arg1 = bitcast <4 x float> %a1 to <4 x i32> %not = xor <4 x i32> %arg0, %res = and <4 x i32> %not, %arg1 %bc = bitcast <4 x i32> %res to <4 x float> ret <4 x float> %bc } define <4 x float> @test_mm_cmpeq_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpeq_ps: ; SSE: # %bb.0: ; SSE-NEXT: cmpeqps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_mm_cmpeq_ps: ; AVX1: # %bb.0: ; AVX1-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_mm_cmpeq_ps: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 ; AVX512-NEXT: ret{{[l|q]}} %cmp = fcmp oeq <4 x float> %a0, %a1 %sext = sext <4 x i1> %cmp to <4 x i32> %res = bitcast <4 x i32> %sext to <4 x float> ret <4 x float> %res } define <4 x float> @test_mm_cmpeq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpeq_ss: ; SSE: # %bb.0: ; SSE-NEXT: cmpeqss %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_cmpeq_ss: ; AVX: # %bb.0: ; AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) ret <4 x float> %res } declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone define <4 x float> @test_mm_cmpge_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpge_ps: ; SSE: # %bb.0: ; SSE-NEXT: cmpleps %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_mm_cmpge_ps: ; AVX1: # %bb.0: ; AVX1-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_mm_cmpge_ps: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %k0 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 ; AVX512-NEXT: ret{{[l|q]}} %cmp = fcmp ole <4 x float> %a1, %a0 %sext = sext <4 x i1> %cmp to <4 x i32> %res = bitcast <4 x i32> %sext to <4 x float> ret <4 x float> %res } define <4 x float> @test_mm_cmpge_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpge_ss: ; SSE: # %bb.0: ; SSE-NEXT: cmpless %xmm0, %xmm1 ; SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; SSE-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_mm_cmpge_ss: ; AVX1: # %bb.0: ; AVX1-NEXT: vcmpless %xmm0, %xmm1, %xmm1 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_mm_cmpge_ss: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpless %xmm0, %xmm1, %xmm1 ; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; AVX512-NEXT: ret{{[l|q]}} %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 2) %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> ret <4 x float> %res } define <4 x float> @test_mm_cmpgt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpgt_ps: ; SSE: # %bb.0: ; SSE-NEXT: cmpltps %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_mm_cmpgt_ps: ; AVX1: # %bb.0: ; AVX1-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_mm_cmpgt_ps: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k0 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 ; AVX512-NEXT: ret{{[l|q]}} %cmp = fcmp olt <4 x float> %a1, %a0 %sext = sext <4 x i1> %cmp to <4 x i32> %res = bitcast <4 x i32> %sext to <4 x float> ret <4 x float> %res } define <4 x float> @test_mm_cmpgt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpgt_ss: ; SSE: # %bb.0: ; SSE-NEXT: cmpltss %xmm0, %xmm1 ; SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; SSE-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_mm_cmpgt_ss: ; AVX1: # %bb.0: ; AVX1-NEXT: vcmpltss %xmm0, %xmm1, %xmm1 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_mm_cmpgt_ss: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltss %xmm0, %xmm1, %xmm1 ; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; AVX512-NEXT: ret{{[l|q]}} %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 1) %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> ret <4 x float> %res } define <4 x float> @test_mm_cmple_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmple_ps: ; SSE: # %bb.0: ; SSE-NEXT: cmpleps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_mm_cmple_ps: ; AVX1: # %bb.0: ; AVX1-NEXT: vcmpleps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_mm_cmple_ps: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpleps %xmm1, %xmm0, %k0 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 ; AVX512-NEXT: ret{{[l|q]}} %cmp = fcmp ole <4 x float> %a0, %a1 %sext = sext <4 x i1> %cmp to <4 x i32> %res = bitcast <4 x i32> %sext to <4 x float> ret <4 x float> %res } define <4 x float> @test_mm_cmple_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmple_ss: ; SSE: # %bb.0: ; SSE-NEXT: cmpless %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_cmple_ss: ; AVX: # %bb.0: ; AVX-NEXT: vcmpless %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 2) ret <4 x float> %res } define <4 x float> @test_mm_cmplt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmplt_ps: ; SSE: # %bb.0: ; SSE-NEXT: cmpltps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_mm_cmplt_ps: ; AVX1: # %bb.0: ; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_mm_cmplt_ps: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltps %xmm1, %xmm0, %k0 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 ; AVX512-NEXT: ret{{[l|q]}} %cmp = fcmp olt <4 x float> %a0, %a1 %sext = sext <4 x i1> %cmp to <4 x i32> %res = bitcast <4 x i32> %sext to <4 x float> ret <4 x float> %res } define <4 x float> @test_mm_cmplt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmplt_ss: ; SSE: # %bb.0: ; SSE-NEXT: cmpltss %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_cmplt_ss: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 1) ret <4 x float> %res } define <4 x float> @test_mm_cmpneq_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpneq_ps: ; SSE: # %bb.0: ; SSE-NEXT: cmpneqps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_mm_cmpneq_ps: ; AVX1: # %bb.0: ; AVX1-NEXT: vcmpneqps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_mm_cmpneq_ps: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpneqps %xmm1, %xmm0, %k0 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 ; AVX512-NEXT: ret{{[l|q]}} %cmp = fcmp une <4 x float> %a0, %a1 %sext = sext <4 x i1> %cmp to <4 x i32> %res = bitcast <4 x i32> %sext to <4 x float> ret <4 x float> %res } define <4 x float> @test_mm_cmpneq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpneq_ss: ; SSE: # %bb.0: ; SSE-NEXT: cmpneqss %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_cmpneq_ss: ; AVX: # %bb.0: ; AVX-NEXT: vcmpneqss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 4) ret <4 x float> %res } define <4 x float> @test_mm_cmpnge_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpnge_ps: ; SSE: # %bb.0: ; SSE-NEXT: cmpnleps %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_mm_cmpnge_ps: ; AVX1: # %bb.0: ; AVX1-NEXT: vcmpnleps %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_mm_cmpnge_ps: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpnleps %xmm0, %xmm1, %k0 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 ; AVX512-NEXT: ret{{[l|q]}} %cmp = fcmp ugt <4 x float> %a1, %a0 %sext = sext <4 x i1> %cmp to <4 x i32> %res = bitcast <4 x i32> %sext to <4 x float> ret <4 x float> %res } define <4 x float> @test_mm_cmpnge_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpnge_ss: ; SSE: # %bb.0: ; SSE-NEXT: cmpnless %xmm0, %xmm1 ; SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; SSE-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_mm_cmpnge_ss: ; AVX1: # %bb.0: ; AVX1-NEXT: vcmpnless %xmm0, %xmm1, %xmm1 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_mm_cmpnge_ss: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpnless %xmm0, %xmm1, %xmm1 ; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; AVX512-NEXT: ret{{[l|q]}} %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 6) %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> ret <4 x float> %res } define <4 x float> @test_mm_cmpngt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpngt_ps: ; SSE: # %bb.0: ; SSE-NEXT: cmpnltps %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_mm_cmpngt_ps: ; AVX1: # %bb.0: ; AVX1-NEXT: vcmpnltps %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_mm_cmpngt_ps: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpnltps %xmm0, %xmm1, %k0 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 ; AVX512-NEXT: ret{{[l|q]}} %cmp = fcmp uge <4 x float> %a1, %a0 %sext = sext <4 x i1> %cmp to <4 x i32> %res = bitcast <4 x i32> %sext to <4 x float> ret <4 x float> %res } define <4 x float> @test_mm_cmpngt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpngt_ss: ; SSE: # %bb.0: ; SSE-NEXT: cmpnltss %xmm0, %xmm1 ; SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; SSE-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_mm_cmpngt_ss: ; AVX1: # %bb.0: ; AVX1-NEXT: vcmpnltss %xmm0, %xmm1, %xmm1 ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_mm_cmpngt_ss: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpnltss %xmm0, %xmm1, %xmm1 ; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; AVX512-NEXT: ret{{[l|q]}} %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 5) %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> ret <4 x float> %res } define <4 x float> @test_mm_cmpnle_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpnle_ps: ; SSE: # %bb.0: ; SSE-NEXT: cmpnleps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_mm_cmpnle_ps: ; AVX1: # %bb.0: ; AVX1-NEXT: vcmpnleps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_mm_cmpnle_ps: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpnleps %xmm1, %xmm0, %k0 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 ; AVX512-NEXT: ret{{[l|q]}} %cmp = fcmp ugt <4 x float> %a0, %a1 %sext = sext <4 x i1> %cmp to <4 x i32> %res = bitcast <4 x i32> %sext to <4 x float> ret <4 x float> %res } define <4 x float> @test_mm_cmpnle_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpnle_ss: ; SSE: # %bb.0: ; SSE-NEXT: cmpnless %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_cmpnle_ss: ; AVX: # %bb.0: ; AVX-NEXT: vcmpnless %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 6) ret <4 x float> %res } define <4 x float> @test_mm_cmpnlt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpnlt_ps: ; SSE: # %bb.0: ; SSE-NEXT: cmpnltps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_mm_cmpnlt_ps: ; AVX1: # %bb.0: ; AVX1-NEXT: vcmpnltps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_mm_cmpnlt_ps: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpnltps %xmm1, %xmm0, %k0 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 ; AVX512-NEXT: ret{{[l|q]}} %cmp = fcmp uge <4 x float> %a0, %a1 %sext = sext <4 x i1> %cmp to <4 x i32> %res = bitcast <4 x i32> %sext to <4 x float> ret <4 x float> %res } define <4 x float> @test_mm_cmpnlt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpnlt_ss: ; SSE: # %bb.0: ; SSE-NEXT: cmpnltss %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_cmpnlt_ss: ; AVX: # %bb.0: ; AVX-NEXT: vcmpnltss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 5) ret <4 x float> %res } define <4 x float> @test_mm_cmpord_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpord_ps: ; SSE: # %bb.0: ; SSE-NEXT: cmpordps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_mm_cmpord_ps: ; AVX1: # %bb.0: ; AVX1-NEXT: vcmpordps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_mm_cmpord_ps: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpordps %xmm1, %xmm0, %k0 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 ; AVX512-NEXT: ret{{[l|q]}} %cmp = fcmp ord <4 x float> %a0, %a1 %sext = sext <4 x i1> %cmp to <4 x i32> %res = bitcast <4 x i32> %sext to <4 x float> ret <4 x float> %res } define <4 x float> @test_mm_cmpord_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpord_ss: ; SSE: # %bb.0: ; SSE-NEXT: cmpordss %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_cmpord_ss: ; AVX: # %bb.0: ; AVX-NEXT: vcmpordss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ret <4 x float> %res } define <4 x float> @test_mm_cmpunord_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpunord_ps: ; SSE: # %bb.0: ; SSE-NEXT: cmpunordps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_mm_cmpunord_ps: ; AVX1: # %bb.0: ; AVX1-NEXT: vcmpunordps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_mm_cmpunord_ps: ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpunordps %xmm1, %xmm0, %k0 ; AVX512-NEXT: vpmovm2d %k0, %xmm0 ; AVX512-NEXT: ret{{[l|q]}} %cmp = fcmp uno <4 x float> %a0, %a1 %sext = sext <4 x i1> %cmp to <4 x i32> %res = bitcast <4 x i32> %sext to <4 x float> ret <4 x float> %res } define <4 x float> @test_mm_cmpunord_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cmpunord_ss: ; SSE: # %bb.0: ; SSE-NEXT: cmpunordss %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_cmpunord_ss: ; AVX: # %bb.0: ; AVX-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 3) ret <4 x float> %res } define i32 @test_mm_comieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_comieq_ss: ; SSE: # %bb.0: ; SSE-NEXT: comiss %xmm1, %xmm0 ; SSE-NEXT: setnp %al ; SSE-NEXT: sete %cl ; SSE-NEXT: andb %al, %cl ; SSE-NEXT: movzbl %cl, %eax ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_comieq_ss: ; AVX: # %bb.0: ; AVX-NEXT: vcomiss %xmm1, %xmm0 ; AVX-NEXT: setnp %al ; AVX-NEXT: sete %cl ; AVX-NEXT: andb %al, %cl ; AVX-NEXT: movzbl %cl, %eax ; AVX-NEXT: ret{{[l|q]}} %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res } declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_comige_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_comige_ss: ; SSE: # %bb.0: ; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: comiss %xmm1, %xmm0 ; SSE-NEXT: setae %al ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_comige_ss: ; AVX: # %bb.0: ; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vcomiss %xmm1, %xmm0 ; AVX-NEXT: setae %al ; AVX-NEXT: ret{{[l|q]}} %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res } declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_comigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_comigt_ss: ; SSE: # %bb.0: ; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: comiss %xmm1, %xmm0 ; SSE-NEXT: seta %al ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_comigt_ss: ; AVX: # %bb.0: ; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vcomiss %xmm1, %xmm0 ; AVX-NEXT: seta %al ; AVX-NEXT: ret{{[l|q]}} %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res } declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_comile_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_comile_ss: ; SSE: # %bb.0: ; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: comiss %xmm0, %xmm1 ; SSE-NEXT: setae %al ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_comile_ss: ; AVX: # %bb.0: ; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vcomiss %xmm0, %xmm1 ; AVX-NEXT: setae %al ; AVX-NEXT: ret{{[l|q]}} %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res } declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_comilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_comilt_ss: ; SSE: # %bb.0: ; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: comiss %xmm0, %xmm1 ; SSE-NEXT: seta %al ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_comilt_ss: ; AVX: # %bb.0: ; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vcomiss %xmm0, %xmm1 ; AVX-NEXT: seta %al ; AVX-NEXT: ret{{[l|q]}} %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res } declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_comineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_comineq_ss: ; SSE: # %bb.0: ; SSE-NEXT: comiss %xmm1, %xmm0 ; SSE-NEXT: setp %al ; SSE-NEXT: setne %cl ; SSE-NEXT: orb %al, %cl ; SSE-NEXT: movzbl %cl, %eax ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_comineq_ss: ; AVX: # %bb.0: ; AVX-NEXT: vcomiss %xmm1, %xmm0 ; AVX-NEXT: setp %al ; AVX-NEXT: setne %cl ; AVX-NEXT: orb %al, %cl ; AVX-NEXT: movzbl %cl, %eax ; AVX-NEXT: ret{{[l|q]}} %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res } declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_cvt_ss2si(<4 x float> %a0) nounwind { ; SSE-LABEL: test_mm_cvt_ss2si: ; SSE: # %bb.0: ; SSE-NEXT: cvtss2si %xmm0, %eax ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_cvt_ss2si: ; AVX: # %bb.0: ; AVX-NEXT: vcvtss2si %xmm0, %eax ; AVX-NEXT: ret{{[l|q]}} %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ret i32 %res } declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone define <4 x float> @test_mm_cvtsi32_ss(<4 x float> %a0, i32 %a1) nounwind { ; X86-SSE-LABEL: test_mm_cvtsi32_ss: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_cvtsi32_ss: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_cvtsi32_ss: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: cvtsi2ssl %edi, %xmm0 ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_cvtsi32_ss: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 ; X64-AVX-NEXT: retq %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 %a1) ret <4 x float> %res } declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone define float @test_mm_cvtss_f32(<4 x float> %a0) nounwind { ; X86-SSE-LABEL: test_mm_cvtss_f32: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax ; X86-SSE-NEXT: movss %xmm0, (%esp) ; X86-SSE-NEXT: flds (%esp) ; X86-SSE-NEXT: popl %eax ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_cvtss_f32: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax ; X86-AVX-NEXT: vmovss %xmm0, (%esp) ; X86-AVX-NEXT: flds (%esp) ; X86-AVX-NEXT: popl %eax ; X86-AVX-NEXT: retl ; ; X64-LABEL: test_mm_cvtss_f32: ; X64: # %bb.0: ; X64-NEXT: retq %res = extractelement <4 x float> %a0, i32 0 ret float %res } define i32 @test_mm_cvtss_si32(<4 x float> %a0) nounwind { ; SSE-LABEL: test_mm_cvtss_si32: ; SSE: # %bb.0: ; SSE-NEXT: cvtss2si %xmm0, %eax ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_cvtss_si32: ; AVX: # %bb.0: ; AVX-NEXT: vcvtss2si %xmm0, %eax ; AVX-NEXT: ret{{[l|q]}} %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ret i32 %res } define i32 @test_mm_cvttss_si(<4 x float> %a0) nounwind { ; SSE-LABEL: test_mm_cvttss_si: ; SSE: # %bb.0: ; SSE-NEXT: cvttss2si %xmm0, %eax ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_cvttss_si: ; AVX: # %bb.0: ; AVX-NEXT: vcvttss2si %xmm0, %eax ; AVX-NEXT: ret{{[l|q]}} %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ret i32 %res } declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone define i32 @test_mm_cvttss_si32(<4 x float> %a0) nounwind { ; SSE-LABEL: test_mm_cvttss_si32: ; SSE: # %bb.0: ; SSE-NEXT: cvttss2si %xmm0, %eax ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_cvttss_si32: ; AVX: # %bb.0: ; AVX-NEXT: vcvttss2si %xmm0, %eax ; AVX-NEXT: ret{{[l|q]}} %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ret i32 %res } define <4 x float> @test_mm_div_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_div_ps: ; SSE: # %bb.0: ; SSE-NEXT: divps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_div_ps: ; AVX: # %bb.0: ; AVX-NEXT: vdivps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = fdiv <4 x float> %a0, %a1 ret <4 x float> %res } define <4 x float> @test_mm_div_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_div_ss: ; SSE: # %bb.0: ; SSE-NEXT: divss %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_div_ss: ; AVX: # %bb.0: ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %ext0 = extractelement <4 x float> %a0, i32 0 %ext1 = extractelement <4 x float> %a1, i32 0 %fdiv = fdiv float %ext0, %ext1 %res = insertelement <4 x float> %a0, float %fdiv, i32 0 ret <4 x float> %res } define i32 @test_MM_GET_EXCEPTION_MASK() nounwind { ; X86-SSE-LABEL: test_MM_GET_EXCEPTION_MASK: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax ; X86-SSE-NEXT: movl %esp, %eax ; X86-SSE-NEXT: stmxcsr (%eax) ; X86-SSE-NEXT: movl (%esp), %eax ; X86-SSE-NEXT: andl $8064, %eax # imm = 0x1F80 ; X86-SSE-NEXT: popl %ecx ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_MM_GET_EXCEPTION_MASK: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax ; X86-AVX-NEXT: movl %esp, %eax ; X86-AVX-NEXT: vstmxcsr (%eax) ; X86-AVX-NEXT: movl (%esp), %eax ; X86-AVX-NEXT: andl $8064, %eax # imm = 0x1F80 ; X86-AVX-NEXT: popl %ecx ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_MM_GET_EXCEPTION_MASK: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-SSE-NEXT: stmxcsr (%rax) ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; X64-SSE-NEXT: andl $8064, %eax # imm = 0x1F80 ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_MM_GET_EXCEPTION_MASK: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-AVX-NEXT: vstmxcsr (%rax) ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; X64-AVX-NEXT: andl $8064, %eax # imm = 0x1F80 ; X64-AVX-NEXT: retq %1 = alloca i32, align 4 %2 = bitcast i32* %1 to i8* call void @llvm.x86.sse.stmxcsr(i8* %2) %3 = load i32, i32* %1, align 4 %4 = and i32 %3, 8064 ret i32 %4 } declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone define i32 @test_MM_GET_EXCEPTION_STATE() nounwind { ; X86-SSE-LABEL: test_MM_GET_EXCEPTION_STATE: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax ; X86-SSE-NEXT: movl %esp, %eax ; X86-SSE-NEXT: stmxcsr (%eax) ; X86-SSE-NEXT: movl (%esp), %eax ; X86-SSE-NEXT: andl $63, %eax ; X86-SSE-NEXT: popl %ecx ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_MM_GET_EXCEPTION_STATE: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax ; X86-AVX-NEXT: movl %esp, %eax ; X86-AVX-NEXT: vstmxcsr (%eax) ; X86-AVX-NEXT: movl (%esp), %eax ; X86-AVX-NEXT: andl $63, %eax ; X86-AVX-NEXT: popl %ecx ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_MM_GET_EXCEPTION_STATE: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-SSE-NEXT: stmxcsr (%rax) ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; X64-SSE-NEXT: andl $63, %eax ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_MM_GET_EXCEPTION_STATE: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-AVX-NEXT: vstmxcsr (%rax) ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; X64-AVX-NEXT: andl $63, %eax ; X64-AVX-NEXT: retq %1 = alloca i32, align 4 %2 = bitcast i32* %1 to i8* call void @llvm.x86.sse.stmxcsr(i8* %2) %3 = load i32, i32* %1, align 4 %4 = and i32 %3, 63 ret i32 %4 } define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind { ; X86-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax ; X86-SSE-NEXT: movl %esp, %eax ; X86-SSE-NEXT: stmxcsr (%eax) ; X86-SSE-NEXT: movl (%esp), %eax ; X86-SSE-NEXT: andl $32768, %eax # imm = 0x8000 ; X86-SSE-NEXT: popl %ecx ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax ; X86-AVX-NEXT: movl %esp, %eax ; X86-AVX-NEXT: vstmxcsr (%eax) ; X86-AVX-NEXT: movl (%esp), %eax ; X86-AVX-NEXT: andl $32768, %eax # imm = 0x8000 ; X86-AVX-NEXT: popl %ecx ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-SSE-NEXT: stmxcsr (%rax) ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; X64-SSE-NEXT: andl $32768, %eax # imm = 0x8000 ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-AVX-NEXT: vstmxcsr (%rax) ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; X64-AVX-NEXT: andl $32768, %eax # imm = 0x8000 ; X64-AVX-NEXT: retq %1 = alloca i32, align 4 %2 = bitcast i32* %1 to i8* call void @llvm.x86.sse.stmxcsr(i8* %2) %3 = load i32, i32* %1, align 4 %4 = and i32 %3, 32768 ret i32 %4 } define i32 @test_MM_GET_ROUNDING_MODE() nounwind { ; X86-SSE-LABEL: test_MM_GET_ROUNDING_MODE: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax ; X86-SSE-NEXT: movl %esp, %eax ; X86-SSE-NEXT: stmxcsr (%eax) ; X86-SSE-NEXT: movl (%esp), %eax ; X86-SSE-NEXT: andl $24576, %eax # imm = 0x6000 ; X86-SSE-NEXT: popl %ecx ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_MM_GET_ROUNDING_MODE: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax ; X86-AVX-NEXT: movl %esp, %eax ; X86-AVX-NEXT: vstmxcsr (%eax) ; X86-AVX-NEXT: movl (%esp), %eax ; X86-AVX-NEXT: andl $24576, %eax # imm = 0x6000 ; X86-AVX-NEXT: popl %ecx ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_MM_GET_ROUNDING_MODE: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-SSE-NEXT: stmxcsr (%rax) ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; X64-SSE-NEXT: andl $24576, %eax # imm = 0x6000 ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_MM_GET_ROUNDING_MODE: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-AVX-NEXT: vstmxcsr (%rax) ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; X64-AVX-NEXT: andl $24576, %eax # imm = 0x6000 ; X64-AVX-NEXT: retq %1 = alloca i32, align 4 %2 = bitcast i32* %1 to i8* call void @llvm.x86.sse.stmxcsr(i8* %2) %3 = load i32, i32* %1, align 4 %4 = and i32 %3, 24576 ret i32 %4 } define i32 @test_mm_getcsr() nounwind { ; X86-SSE-LABEL: test_mm_getcsr: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax ; X86-SSE-NEXT: movl %esp, %eax ; X86-SSE-NEXT: stmxcsr (%eax) ; X86-SSE-NEXT: movl (%esp), %eax ; X86-SSE-NEXT: popl %ecx ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_getcsr: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax ; X86-AVX-NEXT: movl %esp, %eax ; X86-AVX-NEXT: vstmxcsr (%eax) ; X86-AVX-NEXT: movl (%esp), %eax ; X86-AVX-NEXT: popl %ecx ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_getcsr: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-SSE-NEXT: stmxcsr (%rax) ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_getcsr: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-AVX-NEXT: vstmxcsr (%rax) ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; X64-AVX-NEXT: retq %1 = alloca i32, align 4 %2 = bitcast i32* %1 to i8* call void @llvm.x86.sse.stmxcsr(i8* %2) %3 = load i32, i32* %1, align 4 ret i32 %3 } define <4 x float> @test_mm_load_ps(float* %a0) nounwind { ; X86-SSE-LABEL: test_mm_load_ps: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movaps (%eax), %xmm0 ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_load_ps: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: vmovaps (%eax), %xmm0 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_load_ps: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: movaps (%rdi), %xmm0 ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_load_ps: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovaps (%rdi), %xmm0 ; X64-AVX-NEXT: retq %arg0 = bitcast float* %a0 to <4 x float>* %res = load <4 x float>, <4 x float>* %arg0, align 16 ret <4 x float> %res } define <4 x float> @test_mm_load_ps1(float* %a0) nounwind { ; X86-SSE-LABEL: test_mm_load_ps1: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_load_ps1: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: vbroadcastss (%eax), %xmm0 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_load_ps1: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_load_ps1: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vbroadcastss (%rdi), %xmm0 ; X64-AVX-NEXT: retq %ld = load float, float* %a0, align 4 %res0 = insertelement <4 x float> undef, float %ld, i32 0 %res1 = insertelement <4 x float> %res0, float %ld, i32 1 %res2 = insertelement <4 x float> %res1, float %ld, i32 2 %res3 = insertelement <4 x float> %res2, float %ld, i32 3 ret <4 x float> %res3 } define <4 x float> @test_mm_load_ss(float* %a0) nounwind { ; X86-SSE-LABEL: test_mm_load_ss: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_load_ss: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_load_ss: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_load_ss: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-AVX-NEXT: retq %ld = load float, float* %a0, align 1 %res0 = insertelement <4 x float> undef, float %ld, i32 0 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1 %res2 = insertelement <4 x float> %res1, float 0.0, i32 2 %res3 = insertelement <4 x float> %res2, float 0.0, i32 3 ret <4 x float> %res3 } define <4 x float> @test_mm_load1_ps(float* %a0) nounwind { ; X86-SSE-LABEL: test_mm_load1_ps: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_load1_ps: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: vbroadcastss (%eax), %xmm0 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_load1_ps: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_load1_ps: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vbroadcastss (%rdi), %xmm0 ; X64-AVX-NEXT: retq %ld = load float, float* %a0, align 4 %res0 = insertelement <4 x float> undef, float %ld, i32 0 %res1 = insertelement <4 x float> %res0, float %ld, i32 1 %res2 = insertelement <4 x float> %res1, float %ld, i32 2 %res3 = insertelement <4 x float> %res2, float %ld, i32 3 ret <4 x float> %res3 } define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, x86_mmx* %a1) { ; X86-SSE-LABEL: test_mm_loadh_pi: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X86-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_loadh_pi: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; X86-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_loadh_pi: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: movq (%rdi), %rax ; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ; X64-SSE-NEXT: shrq $32, %rax ; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ; X64-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X64-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_loadh_pi: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] ; X64-AVX-NEXT: retq %ptr = bitcast x86_mmx* %a1 to <2 x float>* %ld = load <2 x float>, <2 x float>* %ptr %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> ret <4 x float> %res } define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, x86_mmx* %a1) { ; X86-SSE-LABEL: test_mm_loadl_pi: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X86-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X86-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] ; X86-SSE-NEXT: movaps %xmm1, %xmm0 ; X86-SSE-NEXT: retl ; ; X86-AVX1-LABEL: test_mm_loadl_pi: ; X86-AVX1: # %bb.0: ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; X86-AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; X86-AVX1-NEXT: retl ; ; X86-AVX512-LABEL: test_mm_loadl_pi: ; X86-AVX512: # %bb.0: ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX512-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; X86-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; X86-AVX512-NEXT: retl ; ; X64-SSE-LABEL: test_mm_loadl_pi: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: movq (%rdi), %rax ; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ; X64-SSE-NEXT: shrq $32, %rax ; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ; X64-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X64-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X64-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] ; X64-SSE-NEXT: movaps %xmm1, %xmm0 ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_loadl_pi: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1] ; X64-AVX-NEXT: retq %ptr = bitcast x86_mmx* %a1 to <2 x float>* %ld = load <2 x float>, <2 x float>* %ptr %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> ret <4 x float> %res } define <4 x float> @test_mm_loadr_ps(float* %a0) nounwind { ; X86-SSE-LABEL: test_mm_loadr_ps: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movaps (%eax), %xmm0 ; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0] ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_loadr_ps: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0] ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_loadr_ps: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: movaps (%rdi), %xmm0 ; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0] ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_loadr_ps: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0] ; X64-AVX-NEXT: retq %arg0 = bitcast float* %a0 to <4 x float>* %ld = load <4 x float>, <4 x float>* %arg0, align 16 %res = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> ret <4 x float> %res } define <4 x float> @test_mm_loadu_ps(float* %a0) nounwind { ; X86-SSE-LABEL: test_mm_loadu_ps: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movups (%eax), %xmm0 ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_loadu_ps: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: vmovups (%eax), %xmm0 ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_loadu_ps: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: movups (%rdi), %xmm0 ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_loadu_ps: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovups (%rdi), %xmm0 ; X64-AVX-NEXT: retq %arg0 = bitcast float* %a0 to <4 x float>* %res = load <4 x float>, <4 x float>* %arg0, align 1 ret <4 x float> %res } define <4 x float> @test_mm_max_ps(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_mm_max_ps: ; SSE: # %bb.0: ; SSE-NEXT: maxps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_max_ps: ; AVX: # %bb.0: ; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ret <4 x float> %res } declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone define <4 x float> @test_mm_max_ss(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_mm_max_ss: ; SSE: # %bb.0: ; SSE-NEXT: maxss %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_max_ss: ; AVX: # %bb.0: ; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ret <4 x float> %res } declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone define <4 x float> @test_mm_min_ps(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_mm_min_ps: ; SSE: # %bb.0: ; SSE-NEXT: minps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_min_ps: ; AVX: # %bb.0: ; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ret <4 x float> %res } declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone define <4 x float> @test_mm_min_ss(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_mm_min_ss: ; SSE: # %bb.0: ; SSE-NEXT: minss %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_min_ss: ; AVX: # %bb.0: ; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ret <4 x float> %res } declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone define <4 x float> @test_mm_move_ss(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_mm_move_ss: ; SSE: # %bb.0: ; SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; SSE-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: test_mm_move_ss: ; AVX1: # %bb.0: ; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; AVX1-NEXT: ret{{[l|q]}} ; ; AVX512-LABEL: test_mm_move_ss: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; AVX512-NEXT: ret{{[l|q]}} %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> ret <4 x float> %res } define <4 x float> @test_mm_movehl_ps(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_mm_movehl_ps: ; SSE: # %bb.0: ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_movehl_ps: ; AVX: # %bb.0: ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] ; AVX-NEXT: ret{{[l|q]}} %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> ret <4 x float> %res } define <4 x float> @test_mm_movelh_ps(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_mm_movelh_ps: ; SSE: # %bb.0: ; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_movelh_ps: ; AVX: # %bb.0: ; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: ret{{[l|q]}} %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> ret <4 x float> %res } define i32 @test_mm_movemask_ps(<4 x float> %a0) nounwind { ; SSE-LABEL: test_mm_movemask_ps: ; SSE: # %bb.0: ; SSE-NEXT: movmskps %xmm0, %eax ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_movemask_ps: ; AVX: # %bb.0: ; AVX-NEXT: vmovmskps %xmm0, %eax ; AVX-NEXT: ret{{[l|q]}} %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ret i32 %res } declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone define <4 x float> @test_mm_mul_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_mul_ps: ; SSE: # %bb.0: ; SSE-NEXT: mulps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_mul_ps: ; AVX: # %bb.0: ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = fmul <4 x float> %a0, %a1 ret <4 x float> %res } define <4 x float> @test_mm_mul_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_mul_ss: ; SSE: # %bb.0: ; SSE-NEXT: mulss %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_mul_ss: ; AVX: # %bb.0: ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %ext0 = extractelement <4 x float> %a0, i32 0 %ext1 = extractelement <4 x float> %a1, i32 0 %fmul = fmul float %ext0, %ext1 %res = insertelement <4 x float> %a0, float %fmul, i32 0 ret <4 x float> %res } define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_or_ps: ; SSE: # %bb.0: ; SSE-NEXT: orps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_or_ps: ; AVX: # %bb.0: ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <4 x float> %a0 to <4 x i32> %arg1 = bitcast <4 x float> %a1 to <4 x i32> %res = or <4 x i32> %arg0, %arg1 %bc = bitcast <4 x i32> %res to <4 x float> ret <4 x float> %bc } define void @test_mm_prefetch(i8* %a0) { ; X86-LABEL: test_mm_prefetch: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: prefetchnta (%eax) ; X86-NEXT: retl ; ; X64-LABEL: test_mm_prefetch: ; X64: # %bb.0: ; X64-NEXT: prefetchnta (%rdi) ; X64-NEXT: retq call void @llvm.prefetch(i8* %a0, i32 0, i32 0, i32 1) ret void } declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind readnone define <4 x float> @test_mm_rcp_ps(<4 x float> %a0) { ; SSE-LABEL: test_mm_rcp_ps: ; SSE: # %bb.0: ; SSE-NEXT: rcpps %xmm0, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_rcp_ps: ; AVX: # %bb.0: ; AVX-NEXT: vrcpps %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ret <4 x float> %res } declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone define <4 x float> @test_mm_rcp_ss(<4 x float> %a0) { ; SSE-LABEL: test_mm_rcp_ss: ; SSE: # %bb.0: ; SSE-NEXT: rcpss %xmm0, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_rcp_ss: ; AVX: # %bb.0: ; AVX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %rcp = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ret <4 x float> %rcp } declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone define <4 x float> @test_mm_rsqrt_ps(<4 x float> %a0) { ; SSE-LABEL: test_mm_rsqrt_ps: ; SSE: # %bb.0: ; SSE-NEXT: rsqrtps %xmm0, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_rsqrt_ps: ; AVX: # %bb.0: ; AVX-NEXT: vrsqrtps %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ret <4 x float> %res } declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone define <4 x float> @test_mm_rsqrt_ss(<4 x float> %a0) { ; SSE-LABEL: test_mm_rsqrt_ss: ; SSE: # %bb.0: ; SSE-NEXT: rsqrtss %xmm0, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_rsqrt_ss: ; AVX: # %bb.0: ; AVX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %rsqrt = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ret <4 x float> %rsqrt } declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone define void @test_MM_SET_EXCEPTION_MASK(i32 %a0) nounwind { ; X86-SSE-LABEL: test_MM_SET_EXCEPTION_MASK: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movl %esp, %ecx ; X86-SSE-NEXT: stmxcsr (%ecx) ; X86-SSE-NEXT: movl (%esp), %edx ; X86-SSE-NEXT: andl $-8065, %edx # imm = 0xE07F ; X86-SSE-NEXT: orl %eax, %edx ; X86-SSE-NEXT: movl %edx, (%esp) ; X86-SSE-NEXT: ldmxcsr (%ecx) ; X86-SSE-NEXT: popl %eax ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_MM_SET_EXCEPTION_MASK: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl %esp, %ecx ; X86-AVX-NEXT: vstmxcsr (%ecx) ; X86-AVX-NEXT: movl (%esp), %edx ; X86-AVX-NEXT: andl $-8065, %edx # imm = 0xE07F ; X86-AVX-NEXT: orl %eax, %edx ; X86-AVX-NEXT: movl %edx, (%esp) ; X86-AVX-NEXT: vldmxcsr (%ecx) ; X86-AVX-NEXT: popl %eax ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_MM_SET_EXCEPTION_MASK: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-SSE-NEXT: stmxcsr (%rax) ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx ; X64-SSE-NEXT: andl $-8065, %ecx # imm = 0xE07F ; X64-SSE-NEXT: orl %edi, %ecx ; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ; X64-SSE-NEXT: ldmxcsr (%rax) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_MM_SET_EXCEPTION_MASK: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-AVX-NEXT: vstmxcsr (%rax) ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx ; X64-AVX-NEXT: andl $-8065, %ecx # imm = 0xE07F ; X64-AVX-NEXT: orl %edi, %ecx ; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ; X64-AVX-NEXT: vldmxcsr (%rax) ; X64-AVX-NEXT: retq %1 = alloca i32, align 4 %2 = bitcast i32* %1 to i8* call void @llvm.x86.sse.stmxcsr(i8* %2) %3 = load i32, i32* %1 %4 = and i32 %3, -8065 %5 = or i32 %4, %a0 store i32 %5, i32* %1 call void @llvm.x86.sse.ldmxcsr(i8* %2) ret void } declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone define void @test_MM_SET_EXCEPTION_STATE(i32 %a0) nounwind { ; X86-SSE-LABEL: test_MM_SET_EXCEPTION_STATE: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movl %esp, %ecx ; X86-SSE-NEXT: stmxcsr (%ecx) ; X86-SSE-NEXT: movl (%esp), %edx ; X86-SSE-NEXT: andl $-64, %edx ; X86-SSE-NEXT: orl %eax, %edx ; X86-SSE-NEXT: movl %edx, (%esp) ; X86-SSE-NEXT: ldmxcsr (%ecx) ; X86-SSE-NEXT: popl %eax ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_MM_SET_EXCEPTION_STATE: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl %esp, %ecx ; X86-AVX-NEXT: vstmxcsr (%ecx) ; X86-AVX-NEXT: movl (%esp), %edx ; X86-AVX-NEXT: andl $-64, %edx ; X86-AVX-NEXT: orl %eax, %edx ; X86-AVX-NEXT: movl %edx, (%esp) ; X86-AVX-NEXT: vldmxcsr (%ecx) ; X86-AVX-NEXT: popl %eax ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_MM_SET_EXCEPTION_STATE: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-SSE-NEXT: stmxcsr (%rax) ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx ; X64-SSE-NEXT: andl $-64, %ecx ; X64-SSE-NEXT: orl %edi, %ecx ; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ; X64-SSE-NEXT: ldmxcsr (%rax) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_MM_SET_EXCEPTION_STATE: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-AVX-NEXT: vstmxcsr (%rax) ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx ; X64-AVX-NEXT: andl $-64, %ecx ; X64-AVX-NEXT: orl %edi, %ecx ; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ; X64-AVX-NEXT: vldmxcsr (%rax) ; X64-AVX-NEXT: retq %1 = alloca i32, align 4 %2 = bitcast i32* %1 to i8* call void @llvm.x86.sse.stmxcsr(i8* %2) %3 = load i32, i32* %1 %4 = and i32 %3, -64 %5 = or i32 %4, %a0 store i32 %5, i32* %1 call void @llvm.x86.sse.ldmxcsr(i8* %2) ret void } define void @test_MM_SET_FLUSH_ZERO_MODE(i32 %a0) nounwind { ; X86-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movl %esp, %ecx ; X86-SSE-NEXT: stmxcsr (%ecx) ; X86-SSE-NEXT: movl (%esp), %edx ; X86-SSE-NEXT: andl $-32769, %edx # imm = 0xFFFF7FFF ; X86-SSE-NEXT: orl %eax, %edx ; X86-SSE-NEXT: movl %edx, (%esp) ; X86-SSE-NEXT: ldmxcsr (%ecx) ; X86-SSE-NEXT: popl %eax ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl %esp, %ecx ; X86-AVX-NEXT: vstmxcsr (%ecx) ; X86-AVX-NEXT: movl (%esp), %edx ; X86-AVX-NEXT: andl $-32769, %edx # imm = 0xFFFF7FFF ; X86-AVX-NEXT: orl %eax, %edx ; X86-AVX-NEXT: movl %edx, (%esp) ; X86-AVX-NEXT: vldmxcsr (%ecx) ; X86-AVX-NEXT: popl %eax ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-SSE-NEXT: stmxcsr (%rax) ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx ; X64-SSE-NEXT: andl $-32769, %ecx # imm = 0xFFFF7FFF ; X64-SSE-NEXT: orl %edi, %ecx ; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ; X64-SSE-NEXT: ldmxcsr (%rax) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-AVX-NEXT: vstmxcsr (%rax) ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx ; X64-AVX-NEXT: andl $-32769, %ecx # imm = 0xFFFF7FFF ; X64-AVX-NEXT: orl %edi, %ecx ; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ; X64-AVX-NEXT: vldmxcsr (%rax) ; X64-AVX-NEXT: retq %1 = alloca i32, align 4 %2 = bitcast i32* %1 to i8* call void @llvm.x86.sse.stmxcsr(i8* %2) %3 = load i32, i32* %1 %4 = and i32 %3, -32769 %5 = or i32 %4, %a0 store i32 %5, i32* %1 call void @llvm.x86.sse.ldmxcsr(i8* %2) ret void } define <4 x float> @test_mm_set_ps(float %a0, float %a1, float %a2, float %a3) nounwind { ; X86-SSE-LABEL: test_mm_set_ps: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X86-SSE-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero ; X86-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_set_ps: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X86-AVX-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero ; X86-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; X86-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] ; X86-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_set_ps: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X64-SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] ; X64-SSE-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm1[0] ; X64-SSE-NEXT: movaps %xmm3, %xmm0 ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_set_ps: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] ; X64-AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] ; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; X64-AVX-NEXT: retq %res0 = insertelement <4 x float> undef, float %a3, i32 0 %res1 = insertelement <4 x float> %res0, float %a2, i32 1 %res2 = insertelement <4 x float> %res1, float %a1, i32 2 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 ret <4 x float> %res3 } define <4 x float> @test_mm_set_ps1(float %a0) nounwind { ; X86-SSE-LABEL: test_mm_set_ps1: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X86-SSE-NEXT: retl ; ; X86-AVX1-LABEL: test_mm_set_ps1: ; X86-AVX1: # %bb.0: ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X86-AVX1-NEXT: retl ; ; X86-AVX512-LABEL: test_mm_set_ps1: ; X86-AVX512: # %bb.0: ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 ; X86-AVX512-NEXT: retl ; ; X64-SSE-LABEL: test_mm_set_ps1: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X64-SSE-NEXT: retq ; ; X64-AVX1-LABEL: test_mm_set_ps1: ; X64-AVX1: # %bb.0: ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X64-AVX1-NEXT: retq ; ; X64-AVX512-LABEL: test_mm_set_ps1: ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 ; X64-AVX512-NEXT: retq %res0 = insertelement <4 x float> undef, float %a0, i32 0 %res1 = insertelement <4 x float> %res0, float %a0, i32 1 %res2 = insertelement <4 x float> %res1, float %a0, i32 2 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 ret <4 x float> %res3 } define void @test_MM_SET_ROUNDING_MODE(i32 %a0) nounwind { ; X86-SSE-LABEL: test_MM_SET_ROUNDING_MODE: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movl %esp, %ecx ; X86-SSE-NEXT: stmxcsr (%ecx) ; X86-SSE-NEXT: movl (%esp), %edx ; X86-SSE-NEXT: andl $-24577, %edx # imm = 0x9FFF ; X86-SSE-NEXT: orl %eax, %edx ; X86-SSE-NEXT: movl %edx, (%esp) ; X86-SSE-NEXT: ldmxcsr (%ecx) ; X86-SSE-NEXT: popl %eax ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_MM_SET_ROUNDING_MODE: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl %esp, %ecx ; X86-AVX-NEXT: vstmxcsr (%ecx) ; X86-AVX-NEXT: movl (%esp), %edx ; X86-AVX-NEXT: andl $-24577, %edx # imm = 0x9FFF ; X86-AVX-NEXT: orl %eax, %edx ; X86-AVX-NEXT: movl %edx, (%esp) ; X86-AVX-NEXT: vldmxcsr (%ecx) ; X86-AVX-NEXT: popl %eax ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_MM_SET_ROUNDING_MODE: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-SSE-NEXT: stmxcsr (%rax) ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx ; X64-SSE-NEXT: andl $-24577, %ecx # imm = 0x9FFF ; X64-SSE-NEXT: orl %edi, %ecx ; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ; X64-SSE-NEXT: ldmxcsr (%rax) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_MM_SET_ROUNDING_MODE: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-AVX-NEXT: vstmxcsr (%rax) ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx ; X64-AVX-NEXT: andl $-24577, %ecx # imm = 0x9FFF ; X64-AVX-NEXT: orl %edi, %ecx ; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ; X64-AVX-NEXT: vldmxcsr (%rax) ; X64-AVX-NEXT: retq %1 = alloca i32, align 4 %2 = bitcast i32* %1 to i8* call void @llvm.x86.sse.stmxcsr(i8* %2) %3 = load i32, i32* %1 %4 = and i32 %3, -24577 %5 = or i32 %4, %a0 store i32 %5, i32* %1 call void @llvm.x86.sse.ldmxcsr(i8* %2) ret void } define <4 x float> @test_mm_set_ss(float %a0) nounwind { ; X86-SSE-LABEL: test_mm_set_ss: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-SSE-NEXT: xorps %xmm0, %xmm0 ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; X86-SSE-NEXT: retl ; ; X86-AVX1-LABEL: test_mm_set_ss: ; X86-AVX1: # %bb.0: ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X86-AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; X86-AVX1-NEXT: retl ; ; X86-AVX512-LABEL: test_mm_set_ss: ; X86-AVX512: # %bb.0: ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; X86-AVX512-NEXT: retl ; ; X64-SSE-LABEL: test_mm_set_ss: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: xorps %xmm1, %xmm1 ; X64-SSE-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] ; X64-SSE-NEXT: movaps %xmm1, %xmm0 ; X64-SSE-NEXT: retq ; ; X64-AVX1-LABEL: test_mm_set_ss: ; X64-AVX1: # %bb.0: ; X64-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; X64-AVX1-NEXT: retq ; ; X64-AVX512-LABEL: test_mm_set_ss: ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; X64-AVX512-NEXT: retq %res0 = insertelement <4 x float> undef, float %a0, i32 0 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1 %res2 = insertelement <4 x float> %res1, float 0.0, i32 2 %res3 = insertelement <4 x float> %res2, float 0.0, i32 3 ret <4 x float> %res3 } define <4 x float> @test_mm_set1_ps(float %a0) nounwind { ; X86-SSE-LABEL: test_mm_set1_ps: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X86-SSE-NEXT: retl ; ; X86-AVX1-LABEL: test_mm_set1_ps: ; X86-AVX1: # %bb.0: ; X86-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X86-AVX1-NEXT: retl ; ; X86-AVX512-LABEL: test_mm_set1_ps: ; X86-AVX512: # %bb.0: ; X86-AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 ; X86-AVX512-NEXT: retl ; ; X64-SSE-LABEL: test_mm_set1_ps: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X64-SSE-NEXT: retq ; ; X64-AVX1-LABEL: test_mm_set1_ps: ; X64-AVX1: # %bb.0: ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X64-AVX1-NEXT: retq ; ; X64-AVX512-LABEL: test_mm_set1_ps: ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 ; X64-AVX512-NEXT: retq %res0 = insertelement <4 x float> undef, float %a0, i32 0 %res1 = insertelement <4 x float> %res0, float %a0, i32 1 %res2 = insertelement <4 x float> %res1, float %a0, i32 2 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 ret <4 x float> %res3 } define void @test_mm_setcsr(i32 %a0) nounwind { ; X86-SSE-LABEL: test_mm_setcsr: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: ldmxcsr (%eax) ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_setcsr: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: vldmxcsr (%eax) ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_setcsr: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) ; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-SSE-NEXT: ldmxcsr (%rax) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_setcsr: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) ; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-AVX-NEXT: vldmxcsr (%rax) ; X64-AVX-NEXT: retq %st = alloca i32, align 4 store i32 %a0, i32* %st, align 4 %bc = bitcast i32* %st to i8* call void @llvm.x86.sse.ldmxcsr(i8* %bc) ret void } define <4 x float> @test_mm_setr_ps(float %a0, float %a1, float %a2, float %a3) nounwind { ; X86-SSE-LABEL: test_mm_setr_ps: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X86-SSE-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] ; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_setr_ps: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X86-AVX-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero ; X86-AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] ; X86-AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] ; X86-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_setr_ps: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X64-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_setr_ps: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] ; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] ; X64-AVX-NEXT: retq %res0 = insertelement <4 x float> undef, float %a0, i32 0 %res1 = insertelement <4 x float> %res0, float %a1, i32 1 %res2 = insertelement <4 x float> %res1, float %a2, i32 2 %res3 = insertelement <4 x float> %res2, float %a3, i32 3 ret <4 x float> %res3 } define <4 x float> @test_mm_setzero_ps() { ; SSE-LABEL: test_mm_setzero_ps: ; SSE: # %bb.0: ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_setzero_ps: ; AVX: # %bb.0: ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} ret <4 x float> zeroinitializer } define void @test_mm_sfence() nounwind { ; CHECK-LABEL: test_mm_sfence: ; CHECK: # %bb.0: ; CHECK-NEXT: sfence ; CHECK-NEXT: ret{{[l|q]}} call void @llvm.x86.sse.sfence() ret void } declare void @llvm.x86.sse.sfence() nounwind readnone define <4 x float> @test_mm_shuffle_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_shuffle_ps: ; SSE: # %bb.0: ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_shuffle_ps: ; AVX: # %bb.0: ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] ; AVX-NEXT: ret{{[l|q]}} %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> ret <4 x float> %res } define <4 x float> @test_mm_sqrt_ps(<4 x float> %a0) { ; SSE-LABEL: test_mm_sqrt_ps: ; SSE: # %bb.0: ; SSE-NEXT: sqrtps %xmm0, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_sqrt_ps: ; AVX: # %bb.0: ; AVX-NEXT: vsqrtps %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0) ret <4 x float> %res } declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readnone define <4 x float> @test_mm_sqrt_ss(<4 x float> %a0) { ; SSE-LABEL: test_mm_sqrt_ss: ; SSE: # %bb.0: ; SSE-NEXT: sqrtss %xmm0, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_sqrt_ss: ; AVX: # %bb.0: ; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %ext = extractelement <4 x float> %a0, i32 0 %sqrt = call float @llvm.sqrt.f32(float %ext) %ins = insertelement <4 x float> %a0, float %sqrt, i32 0 ret <4 x float> %ins } declare float @llvm.sqrt.f32(float) nounwind readnone define void @test_mm_store_ps(float *%a0, <4 x float> %a1) { ; X86-SSE-LABEL: test_mm_store_ps: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movaps %xmm0, (%eax) ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_store_ps: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: vmovaps %xmm0, (%eax) ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_store_ps: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: movaps %xmm0, (%rdi) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_store_ps: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) ; X64-AVX-NEXT: retq %arg0 = bitcast float* %a0 to <4 x float>* store <4 x float> %a1, <4 x float>* %arg0, align 16 ret void } define void @test_mm_store_ps1(float *%a0, <4 x float> %a1) { ; X86-SSE-LABEL: test_mm_store_ps1: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X86-SSE-NEXT: movaps %xmm0, (%eax) ; X86-SSE-NEXT: retl ; ; X86-AVX1-LABEL: test_mm_store_ps1: ; X86-AVX1: # %bb.0: ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) ; X86-AVX1-NEXT: retl ; ; X86-AVX512-LABEL: test_mm_store_ps1: ; X86-AVX512: # %bb.0: ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) ; X86-AVX512-NEXT: retl ; ; X64-SSE-LABEL: test_mm_store_ps1: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X64-SSE-NEXT: movaps %xmm0, (%rdi) ; X64-SSE-NEXT: retq ; ; X64-AVX1-LABEL: test_mm_store_ps1: ; X64-AVX1: # %bb.0: ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) ; X64-AVX1-NEXT: retq ; ; X64-AVX512-LABEL: test_mm_store_ps1: ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) ; X64-AVX512-NEXT: retq %arg0 = bitcast float* %a0 to <4 x float>* %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer store <4 x float> %shuf, <4 x float>* %arg0, align 16 ret void } define void @test_mm_store_ss(float *%a0, <4 x float> %a1) { ; X86-SSE-LABEL: test_mm_store_ss: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movss %xmm0, (%eax) ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_store_ss: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: vmovss %xmm0, (%eax) ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_store_ss: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: movss %xmm0, (%rdi) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_store_ss: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovss %xmm0, (%rdi) ; X64-AVX-NEXT: retq %ext = extractelement <4 x float> %a1, i32 0 store float %ext, float* %a0, align 1 ret void } define void @test_mm_store1_ps(float *%a0, <4 x float> %a1) { ; X86-SSE-LABEL: test_mm_store1_ps: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X86-SSE-NEXT: movaps %xmm0, (%eax) ; X86-SSE-NEXT: retl ; ; X86-AVX1-LABEL: test_mm_store1_ps: ; X86-AVX1: # %bb.0: ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) ; X86-AVX1-NEXT: retl ; ; X86-AVX512-LABEL: test_mm_store1_ps: ; X86-AVX512: # %bb.0: ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 ; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) ; X86-AVX512-NEXT: retl ; ; X64-SSE-LABEL: test_mm_store1_ps: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X64-SSE-NEXT: movaps %xmm0, (%rdi) ; X64-SSE-NEXT: retq ; ; X64-AVX1-LABEL: test_mm_store1_ps: ; X64-AVX1: # %bb.0: ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) ; X64-AVX1-NEXT: retq ; ; X64-AVX512-LABEL: test_mm_store1_ps: ; X64-AVX512: # %bb.0: ; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 ; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) ; X64-AVX512-NEXT: retq %arg0 = bitcast float* %a0 to <4 x float>* %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer store <4 x float> %shuf, <4 x float>* %arg0, align 16 ret void } define void @test_mm_storeh_ps(x86_mmx *%a0, <4 x float> %a1) nounwind { ; X86-SSE-LABEL: test_mm_storeh_ps: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %ebp ; X86-SSE-NEXT: movl %esp, %ebp ; X86-SSE-NEXT: andl $-16, %esp ; X86-SSE-NEXT: subl $32, %esp ; X86-SSE-NEXT: movl 8(%ebp), %eax ; X86-SSE-NEXT: movaps %xmm0, (%esp) ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE-NEXT: movl %edx, 4(%eax) ; X86-SSE-NEXT: movl %ecx, (%eax) ; X86-SSE-NEXT: movl %ebp, %esp ; X86-SSE-NEXT: popl %ebp ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_storeh_ps: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: vmovhpd %xmm0, (%eax) ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_storeh_ps: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax ; X64-SSE-NEXT: movq %rax, (%rdi) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_storeh_ps: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vpextrq $1, %xmm0, %rax ; X64-AVX-NEXT: movq %rax, (%rdi) ; X64-AVX-NEXT: retq %ptr = bitcast x86_mmx* %a0 to i64* %bc = bitcast <4 x float> %a1 to <2 x i64> %ext = extractelement <2 x i64> %bc, i32 1 store i64 %ext, i64* %ptr ret void } define void @test_mm_storel_ps(x86_mmx *%a0, <4 x float> %a1) nounwind { ; X86-SSE-LABEL: test_mm_storel_ps: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %ebp ; X86-SSE-NEXT: movl %esp, %ebp ; X86-SSE-NEXT: andl $-16, %esp ; X86-SSE-NEXT: subl $32, %esp ; X86-SSE-NEXT: movl 8(%ebp), %eax ; X86-SSE-NEXT: movaps %xmm0, (%esp) ; X86-SSE-NEXT: movl (%esp), %ecx ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE-NEXT: movl %edx, 4(%eax) ; X86-SSE-NEXT: movl %ecx, (%eax) ; X86-SSE-NEXT: movl %ebp, %esp ; X86-SSE-NEXT: popl %ebp ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_storel_ps: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: vmovlps %xmm0, (%eax) ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_storel_ps: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax ; X64-SSE-NEXT: movq %rax, (%rdi) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_storel_ps: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovq %xmm0, %rax ; X64-AVX-NEXT: movq %rax, (%rdi) ; X64-AVX-NEXT: retq %ptr = bitcast x86_mmx* %a0 to i64* %bc = bitcast <4 x float> %a1 to <2 x i64> %ext = extractelement <2 x i64> %bc, i32 0 store i64 %ext, i64* %ptr ret void } define void @test_mm_storer_ps(float *%a0, <4 x float> %a1) { ; X86-SSE-LABEL: test_mm_storer_ps: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0] ; X86-SSE-NEXT: movaps %xmm0, (%eax) ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_storer_ps: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] ; X86-AVX-NEXT: vmovaps %xmm0, (%eax) ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_storer_ps: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0] ; X64-SSE-NEXT: movaps %xmm0, (%rdi) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_storer_ps: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] ; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) ; X64-AVX-NEXT: retq %arg0 = bitcast float* %a0 to <4 x float>* %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> store <4 x float> %shuf, <4 x float>* %arg0, align 16 ret void } define void @test_mm_storeu_ps(float *%a0, <4 x float> %a1) { ; X86-SSE-LABEL: test_mm_storeu_ps: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movups %xmm0, (%eax) ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_storeu_ps: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: vmovups %xmm0, (%eax) ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_storeu_ps: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: movups %xmm0, (%rdi) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_storeu_ps: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovups %xmm0, (%rdi) ; X64-AVX-NEXT: retq %arg0 = bitcast float* %a0 to <4 x float>* store <4 x float> %a1, <4 x float>* %arg0, align 1 ret void } define void @test_mm_stream_ps(float *%a0, <4 x float> %a1) { ; X86-SSE-LABEL: test_mm_stream_ps: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movntps %xmm0, (%eax) ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_mm_stream_ps: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: vmovntps %xmm0, (%eax) ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_mm_stream_ps: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: movntps %xmm0, (%rdi) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_mm_stream_ps: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovntps %xmm0, (%rdi) ; X64-AVX-NEXT: retq %arg0 = bitcast float* %a0 to <4 x float>* store <4 x float> %a1, <4 x float>* %arg0, align 16, !nontemporal !0 ret void } define <4 x float> @test_mm_sub_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_sub_ps: ; SSE: # %bb.0: ; SSE-NEXT: subps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_sub_ps: ; AVX: # %bb.0: ; AVX-NEXT: vsubps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %res = fsub <4 x float> %a0, %a1 ret <4 x float> %res } define <4 x float> @test_mm_sub_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_sub_ss: ; SSE: # %bb.0: ; SSE-NEXT: subss %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_sub_ss: ; AVX: # %bb.0: ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %ext0 = extractelement <4 x float> %a0, i32 0 %ext1 = extractelement <4 x float> %a1, i32 0 %fsub = fsub float %ext0, %ext1 %res = insertelement <4 x float> %a0, float %fsub, i32 0 ret <4 x float> %res } define void @test_MM_TRANSPOSE4_PS(<4 x float>* %a0, <4 x float>* %a1, <4 x float>* %a2, <4 x float>* %a3) nounwind { ; X86-SSE-LABEL: test_MM_TRANSPOSE4_PS: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %esi ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-SSE-NEXT: movaps (%esi), %xmm0 ; X86-SSE-NEXT: movaps (%edx), %xmm1 ; X86-SSE-NEXT: movaps (%ecx), %xmm2 ; X86-SSE-NEXT: movaps (%eax), %xmm3 ; X86-SSE-NEXT: movaps %xmm0, %xmm4 ; X86-SSE-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] ; X86-SSE-NEXT: movaps %xmm2, %xmm5 ; X86-SSE-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] ; X86-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; X86-SSE-NEXT: unpckhps {{.*#+}} xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] ; X86-SSE-NEXT: movaps %xmm4, %xmm1 ; X86-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm5[0] ; X86-SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm4[1],xmm5[1] ; X86-SSE-NEXT: movaps %xmm0, %xmm3 ; X86-SSE-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; X86-SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] ; X86-SSE-NEXT: movaps %xmm1, (%esi) ; X86-SSE-NEXT: movaps %xmm5, (%edx) ; X86-SSE-NEXT: movaps %xmm3, (%ecx) ; X86-SSE-NEXT: movaps %xmm2, (%eax) ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: retl ; ; X86-AVX-LABEL: test_MM_TRANSPOSE4_PS: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %esi ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-AVX-NEXT: vmovaps (%esi), %xmm0 ; X86-AVX-NEXT: vmovaps (%edx), %xmm1 ; X86-AVX-NEXT: vmovaps (%ecx), %xmm2 ; X86-AVX-NEXT: vmovaps (%eax), %xmm3 ; X86-AVX-NEXT: vunpcklps {{.*#+}} xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-AVX-NEXT: vunpcklps {{.*#+}} xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X86-AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; X86-AVX-NEXT: vunpckhps {{.*#+}} xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] ; X86-AVX-NEXT: vmovlhps {{.*#+}} xmm2 = xmm4[0],xmm5[0] ; X86-AVX-NEXT: vunpckhpd {{.*#+}} xmm3 = xmm4[1],xmm5[1] ; X86-AVX-NEXT: vmovlhps {{.*#+}} xmm4 = xmm0[0],xmm1[0] ; X86-AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; X86-AVX-NEXT: vmovaps %xmm2, (%esi) ; X86-AVX-NEXT: vmovaps %xmm3, (%edx) ; X86-AVX-NEXT: vmovaps %xmm4, (%ecx) ; X86-AVX-NEXT: vmovaps %xmm0, (%eax) ; X86-AVX-NEXT: popl %esi ; X86-AVX-NEXT: retl ; ; X64-SSE-LABEL: test_MM_TRANSPOSE4_PS: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: movaps (%rdi), %xmm0 ; X64-SSE-NEXT: movaps (%rsi), %xmm1 ; X64-SSE-NEXT: movaps (%rdx), %xmm2 ; X64-SSE-NEXT: movaps (%rcx), %xmm3 ; X64-SSE-NEXT: movaps %xmm0, %xmm4 ; X64-SSE-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] ; X64-SSE-NEXT: movaps %xmm2, %xmm5 ; X64-SSE-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] ; X64-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; X64-SSE-NEXT: unpckhps {{.*#+}} xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] ; X64-SSE-NEXT: movaps %xmm4, %xmm1 ; X64-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm5[0] ; X64-SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm4[1],xmm5[1] ; X64-SSE-NEXT: movaps %xmm0, %xmm3 ; X64-SSE-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; X64-SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] ; X64-SSE-NEXT: movaps %xmm1, (%rdi) ; X64-SSE-NEXT: movaps %xmm5, (%rsi) ; X64-SSE-NEXT: movaps %xmm3, (%rdx) ; X64-SSE-NEXT: movaps %xmm2, (%rcx) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: test_MM_TRANSPOSE4_PS: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: vmovaps (%rdi), %xmm0 ; X64-AVX-NEXT: vmovaps (%rsi), %xmm1 ; X64-AVX-NEXT: vmovaps (%rdx), %xmm2 ; X64-AVX-NEXT: vmovaps (%rcx), %xmm3 ; X64-AVX-NEXT: vunpcklps {{.*#+}} xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X64-AVX-NEXT: vunpcklps {{.*#+}} xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X64-AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; X64-AVX-NEXT: vunpckhps {{.*#+}} xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] ; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm2 = xmm4[0],xmm5[0] ; X64-AVX-NEXT: vunpckhpd {{.*#+}} xmm3 = xmm4[1],xmm5[1] ; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm4 = xmm0[0],xmm1[0] ; X64-AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; X64-AVX-NEXT: vmovaps %xmm2, (%rdi) ; X64-AVX-NEXT: vmovaps %xmm3, (%rsi) ; X64-AVX-NEXT: vmovaps %xmm4, (%rdx) ; X64-AVX-NEXT: vmovaps %xmm0, (%rcx) ; X64-AVX-NEXT: retq %row0 = load <4 x float>, <4 x float>* %a0, align 16 %row1 = load <4 x float>, <4 x float>* %a1, align 16 %row2 = load <4 x float>, <4 x float>* %a2, align 16 %row3 = load <4 x float>, <4 x float>* %a3, align 16 %tmp0 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> %tmp2 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> %tmp1 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> %tmp3 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> %res0 = shufflevector <4 x float> %tmp0, <4 x float> %tmp2, <4 x i32> %res1 = shufflevector <4 x float> %tmp2, <4 x float> %tmp0, <4 x i32> %res2 = shufflevector <4 x float> %tmp1, <4 x float> %tmp3, <4 x i32> %res3 = shufflevector <4 x float> %tmp3, <4 x float> %tmp1, <4 x i32> store <4 x float> %res0, <4 x float>* %a0, align 16 store <4 x float> %res1, <4 x float>* %a1, align 16 store <4 x float> %res2, <4 x float>* %a2, align 16 store <4 x float> %res3, <4 x float>* %a3, align 16 ret void } define i32 @test_mm_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_ucomieq_ss: ; SSE: # %bb.0: ; SSE-NEXT: ucomiss %xmm1, %xmm0 ; SSE-NEXT: setnp %al ; SSE-NEXT: sete %cl ; SSE-NEXT: andb %al, %cl ; SSE-NEXT: movzbl %cl, %eax ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_ucomieq_ss: ; AVX: # %bb.0: ; AVX-NEXT: vucomiss %xmm1, %xmm0 ; AVX-NEXT: setnp %al ; AVX-NEXT: sete %cl ; AVX-NEXT: andb %al, %cl ; AVX-NEXT: movzbl %cl, %eax ; AVX-NEXT: ret{{[l|q]}} %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res } declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_ucomige_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_ucomige_ss: ; SSE: # %bb.0: ; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomiss %xmm1, %xmm0 ; SSE-NEXT: setae %al ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_ucomige_ss: ; AVX: # %bb.0: ; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vucomiss %xmm1, %xmm0 ; AVX-NEXT: setae %al ; AVX-NEXT: ret{{[l|q]}} %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res } declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_ucomigt_ss: ; SSE: # %bb.0: ; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomiss %xmm1, %xmm0 ; SSE-NEXT: seta %al ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_ucomigt_ss: ; AVX: # %bb.0: ; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vucomiss %xmm1, %xmm0 ; AVX-NEXT: seta %al ; AVX-NEXT: ret{{[l|q]}} %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res } declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_ucomile_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_ucomile_ss: ; SSE: # %bb.0: ; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomiss %xmm0, %xmm1 ; SSE-NEXT: setae %al ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_ucomile_ss: ; AVX: # %bb.0: ; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: setae %al ; AVX-NEXT: ret{{[l|q]}} %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res } declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_ucomilt_ss: ; SSE: # %bb.0: ; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomiss %xmm0, %xmm1 ; SSE-NEXT: seta %al ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_ucomilt_ss: ; AVX: # %bb.0: ; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: seta %al ; AVX-NEXT: ret{{[l|q]}} %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res } declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_ucomineq_ss: ; SSE: # %bb.0: ; SSE-NEXT: ucomiss %xmm1, %xmm0 ; SSE-NEXT: setp %al ; SSE-NEXT: setne %cl ; SSE-NEXT: orb %al, %cl ; SSE-NEXT: movzbl %cl, %eax ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_ucomineq_ss: ; AVX: # %bb.0: ; AVX-NEXT: vucomiss %xmm1, %xmm0 ; AVX-NEXT: setp %al ; AVX-NEXT: setne %cl ; AVX-NEXT: orb %al, %cl ; AVX-NEXT: movzbl %cl, %eax ; AVX-NEXT: ret{{[l|q]}} %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res } declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone define <4 x float> @test_mm_undefined_ps() { ; CHECK-LABEL: test_mm_undefined_ps: ; CHECK: # %bb.0: ; CHECK-NEXT: ret{{[l|q]}} ret <4 x float> undef } define <4 x float> @test_mm_unpackhi_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_unpackhi_ps: ; SSE: # %bb.0: ; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_unpackhi_ps: ; AVX: # %bb.0: ; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; AVX-NEXT: ret{{[l|q]}} %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> ret <4 x float> %res } define <4 x float> @test_mm_unpacklo_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_unpacklo_ps: ; SSE: # %bb.0: ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_unpacklo_ps: ; AVX: # %bb.0: ; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX-NEXT: ret{{[l|q]}} %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> ret <4 x float> %res } define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_xor_ps: ; SSE: # %bb.0: ; SSE-NEXT: xorps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: test_mm_xor_ps: ; AVX: # %bb.0: ; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <4 x float> %a0 to <4 x i32> %arg1 = bitcast <4 x float> %a1 to <4 x i32> %res = xor <4 x i32> %arg0, %arg1 %bc = bitcast <4 x i32> %res to <4 x float> ret <4 x float> %bc } !0 = !{i32 1}