summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp22
-rw-r--r--llvm/test/Transforms/InstCombine/x86-movmsk.ll37
2 files changed, 32 insertions, 27 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 3930cc010a6..3c0a28c4b7f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -768,6 +768,28 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// TODO: Could compute known zero/one bits based on the input.
break;
}
+ case Intrinsic::x86_sse_movmsk_ps:
+ case Intrinsic::x86_sse2_movmsk_pd:
+ case Intrinsic::x86_sse2_pmovmskb_128:
+ case Intrinsic::x86_avx_movmsk_ps_256:
+ case Intrinsic::x86_avx_movmsk_pd_256:
+ case Intrinsic::x86_avx2_pmovmskb: {
+ // MOVMSK copies the vector elements' sign bits to the low bits
+ // and zeros the high bits.
+ auto Arg = II->getArgOperand(0);
+ auto ArgType = cast<VectorType>(Arg->getType());
+ unsigned ArgWidth = ArgType->getNumElements();
+
+ // If we don't need any of low bits then return zero,
+ // we know that DemandedMask is non-zero already.
+ APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth);
+ if (DemandedElts == 0)
+ return ConstantInt::getNullValue(VTy);
+
+ // We know that the upper bits are set to zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - ArgWidth);
+ return nullptr;
+ }
case Intrinsic::x86_sse42_crc32_64_64:
KnownZero = APInt::getHighBitsSet(64, 32);
return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/x86-movmsk.ll b/llvm/test/Transforms/InstCombine/x86-movmsk.ll
index 0e23218dcbc..767899432b0 100644
--- a/llvm/test/Transforms/InstCombine/x86-movmsk.ll
+++ b/llvm/test/Transforms/InstCombine/x86-movmsk.ll
@@ -5,14 +5,12 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
;
; DemandedBits - MOVMSK zeros the upper bits of the result.
-; TODO - we can get the and for free
;
define i32 @test_upper_x86_sse_movmsk_ps(<4 x float> %a0) {
; CHECK-LABEL: @test_upper_x86_sse_movmsk_ps(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 15
-; CHECK-NEXT: ret i32 [[TMP2]]
+; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
%2 = and i32 %1, 15
@@ -22,8 +20,7 @@ define i32 @test_upper_x86_sse_movmsk_ps(<4 x float> %a0) {
define i32 @test_upper_x86_sse2_movmsk_pd(<2 x double> %a0) {
; CHECK-LABEL: @test_upper_x86_sse2_movmsk_pd(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 3
-; CHECK-NEXT: ret i32 [[TMP2]]
+; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
%2 = and i32 %1, 3
@@ -33,8 +30,7 @@ define i32 @test_upper_x86_sse2_movmsk_pd(<2 x double> %a0) {
define i32 @test_upper_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
; CHECK-LABEL: @test_upper_x86_sse2_pmovmskb_128(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 65535
-; CHECK-NEXT: ret i32 [[TMP2]]
+; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
%2 = and i32 %1, 65535
@@ -44,8 +40,7 @@ define i32 @test_upper_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
define i32 @test_upper_x86_avx_movmsk_ps_256(<8 x float> %a0) {
; CHECK-LABEL: @test_upper_x86_avx_movmsk_ps_256(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 255
-; CHECK-NEXT: ret i32 [[TMP2]]
+; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
%2 = and i32 %1, 255
@@ -55,8 +50,7 @@ define i32 @test_upper_x86_avx_movmsk_ps_256(<8 x float> %a0) {
define i32 @test_upper_x86_avx_movmsk_pd_256(<4 x double> %a0) {
; CHECK-LABEL: @test_upper_x86_avx_movmsk_pd_256(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 15
-; CHECK-NEXT: ret i32 [[TMP2]]
+; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
%2 = and i32 %1, 15
@@ -67,14 +61,11 @@ define i32 @test_upper_x86_avx_movmsk_pd_256(<4 x double> %a0) {
;
; DemandedBits - If we don't use the lower bits then we just return zero.
-; TODO - just return zero
;
define i32 @test_lower_x86_sse_movmsk_ps(<4 x float> %a0) {
; CHECK-LABEL: @test_lower_x86_sse_movmsk_ps(
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], -16
-; CHECK-NEXT: ret i32 [[TMP2]]
+; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
%2 = and i32 %1, -16
@@ -83,9 +74,7 @@ define i32 @test_lower_x86_sse_movmsk_ps(<4 x float> %a0) {
define i32 @test_lower_x86_sse2_movmsk_pd(<2 x double> %a0) {
; CHECK-LABEL: @test_lower_x86_sse2_movmsk_pd(
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], -4
-; CHECK-NEXT: ret i32 [[TMP2]]
+; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
%2 = and i32 %1, -4
@@ -94,9 +83,7 @@ define i32 @test_lower_x86_sse2_movmsk_pd(<2 x double> %a0) {
define i32 @test_lower_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
; CHECK-LABEL: @test_lower_x86_sse2_pmovmskb_128(
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], -65536
-; CHECK-NEXT: ret i32 [[TMP2]]
+; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
%2 = and i32 %1, -65536
@@ -105,9 +92,7 @@ define i32 @test_lower_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
define i32 @test_lower_x86_avx_movmsk_ps_256(<8 x float> %a0) {
; CHECK-LABEL: @test_lower_x86_avx_movmsk_ps_256(
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], -256
-; CHECK-NEXT: ret i32 [[TMP2]]
+; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
%2 = and i32 %1, -256
@@ -116,9 +101,7 @@ define i32 @test_lower_x86_avx_movmsk_ps_256(<8 x float> %a0) {
define i32 @test_lower_x86_avx_movmsk_pd_256(<4 x double> %a0) {
; CHECK-LABEL: @test_lower_x86_avx_movmsk_pd_256(
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], -16
-; CHECK-NEXT: ret i32 [[TMP2]]
+; CHECK-NEXT: ret i32 0
;
%1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
%2 = and i32 %1, -16
OpenPOWER on IntegriCloud