; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; ; DemandedBits - MOVMSK zeros the upper bits of the result. ; define i32 @test_upper_x86_sse_movmsk_ps(<4 x float> %a0) { ; CHECK-LABEL: @test_upper_x86_sse_movmsk_ps( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; CHECK-NEXT: ret i32 [[TMP1]] ; %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) %2 = and i32 %1, 15 ret i32 %2 } define i32 @test_upper_x86_sse2_movmsk_pd(<2 x double> %a0) { ; CHECK-LABEL: @test_upper_x86_sse2_movmsk_pd( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; CHECK-NEXT: ret i32 [[TMP1]] ; %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) %2 = and i32 %1, 3 ret i32 %2 } define i32 @test_upper_x86_sse2_pmovmskb_128(<16 x i8> %a0) { ; CHECK-LABEL: @test_upper_x86_sse2_pmovmskb_128( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; CHECK-NEXT: ret i32 [[TMP1]] ; %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) %2 = and i32 %1, 65535 ret i32 %2 } define i32 @test_upper_x86_avx_movmsk_ps_256(<8 x float> %a0) { ; CHECK-LABEL: @test_upper_x86_avx_movmsk_ps_256( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; CHECK-NEXT: ret i32 [[TMP1]] ; %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) %2 = and i32 %1, 255 ret i32 %2 } define i32 @test_upper_x86_avx_movmsk_pd_256(<4 x double> %a0) { ; CHECK-LABEL: @test_upper_x86_avx_movmsk_pd_256( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; CHECK-NEXT: ret i32 [[TMP1]] ; %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) %2 = and i32 %1, 15 ret i32 %2 } ; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register. ; ; DemandedBits - If we don't use the lower bits then we just return zero. ; define i32 @test_lower_x86_sse_movmsk_ps(<4 x float> %a0) { ; CHECK-LABEL: @test_lower_x86_sse_movmsk_ps( ; CHECK-NEXT: ret i32 0 ; %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) %2 = and i32 %1, -16 ret i32 %2 } define i32 @test_lower_x86_sse2_movmsk_pd(<2 x double> %a0) { ; CHECK-LABEL: @test_lower_x86_sse2_movmsk_pd( ; CHECK-NEXT: ret i32 0 ; %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) %2 = and i32 %1, -4 ret i32 %2 } define i32 @test_lower_x86_sse2_pmovmskb_128(<16 x i8> %a0) { ; CHECK-LABEL: @test_lower_x86_sse2_pmovmskb_128( ; CHECK-NEXT: ret i32 0 ; %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) %2 = and i32 %1, -65536 ret i32 %2 } define i32 @test_lower_x86_avx_movmsk_ps_256(<8 x float> %a0) { ; CHECK-LABEL: @test_lower_x86_avx_movmsk_ps_256( ; CHECK-NEXT: ret i32 0 ; %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) %2 = and i32 %1, -256 ret i32 %2 } define i32 @test_lower_x86_avx_movmsk_pd_256(<4 x double> %a0) { ; CHECK-LABEL: @test_lower_x86_avx_movmsk_pd_256( ; CHECK-NEXT: ret i32 0 ; %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) %2 = and i32 %1, -16 ret i32 %2 } ; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register. declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>)