VX512] Only look at lower bit in constant scalar masks

for scalar masked instructions only the lower bit of the mask is relevant. so for constant masks we should either do an unmasked operation or no operation, depending on the value of the lower bit. This patch handles cases where the lower bit is '1'. Differential Revision: https://reviews.llvm.org/D32805 llvm-svn: 302546
author: Guy Blank <guy.blank@intel.com> 2017-05-09 16:16:48 +0000
committer: Guy Blank <guy.blank@intel.com> 2017-05-09 16:16:48 +0000
commit: 0c42d8c35ba6acd4d3620915c64af5a7badc1e73 (patch)
tree: a390f5b8f17ecfe6f12109bb6ba2cd204ec72e75
parent: f7f6f823a4d71e7febbeaf1db4fccbb5a83005d3 (diff)
download: bcm5719-llvm-0c42d8c35ba6acd4d3620915c64af5a7badc1e73.tar.gz
bcm5719-llvm-0c42d8c35ba6acd4d3620915c64af5a7badc1e73.zip
2 files changed, 10 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f0c1e0feed8..68d11100f21 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19021,8 +19021,10 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
                                     SDValue PreservedSrc,
                                     const X86Subtarget &Subtarget,
                                     SelectionDAG &DAG) {
-  if (isAllOnesConstant(Mask))
-    return Op;
+
+  if (auto *MaskConst = dyn_cast<ConstantSDNode>(Mask))
+    if (MaskConst->getZExtValue() & 0x1)
+      return Op;
 
   MVT VT = Op.getSimpleValueType();
   SDLoc dl(Op);
diff --git a/llvm/test/CodeGen/X86/avx512-scalar_mask.ll b/llvm/test/CodeGen/X86/avx512-scalar_mask.ll
index e8f227bec48..47c6813fa8d 100644
--- a/llvm/test/CodeGen/X86/avx512-scalar_mask.ll
+++ b/llvm/test/CodeGen/X86/avx512-scalar_mask.ll
@@ -26,6 +26,7 @@ define <4 x float>@test_var_maskz(<4 x float> %v0, <4 x float> %v1, <4 x float>
   ret < 4 x float> %res
 }
 
+; FIXME: we should just return %xmm0 here.
 define <4 x float>@test_const0_mask(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) {
 ; CHECK-LABEL: test_const0_mask:
 ; CHECK:       ## BB#0:
@@ -36,6 +37,7 @@ define <4 x float>@test_const0_mask(<4 x float> %v0, <4 x float> %v1, <4 x float
   ret < 4 x float> %res
 }
 
+; FIXME: we should zero the lower element of xmm0 and return it.
 define <4 x float>@test_const0_maskz(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) {
 ; CHECK-LABEL: test_const0_maskz:
 ; CHECK:       ## BB#0:
@@ -46,6 +48,7 @@ define <4 x float>@test_const0_maskz(<4 x float> %v0, <4 x float> %v1, <4 x floa
   ret < 4 x float> %res
 }
 
+; FIXME: we should just return %xmm0 here.
 define <4 x float>@test_const2_mask(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) {
 ; CHECK-LABEL: test_const2_mask:
 ; CHECK:       ## BB#0:
@@ -56,6 +59,7 @@ define <4 x float>@test_const2_mask(<4 x float> %v0, <4 x float> %v1, <4 x float
   ret < 4 x float> %res
 }
 
+; FIXME: we should zero the lower element of xmm0 and return it.
 define <4 x float>@test_const2_maskz(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) {
 ; CHECK-LABEL: test_const2_maskz:
 ; CHECK:       ## BB#0:
@@ -87,9 +91,7 @@ define <4 x float>@test_const_allone_maskz(<4 x float> %v0, <4 x float> %v1, <4
 define <4 x float>@test_const_3_mask(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) {
 ; CHECK-LABEL: test_const_3_mask:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    kxnorw %k0, %k0, %k0
-; CHECK-NEXT:    kshiftrw $15, %k0, %k1
-; CHECK-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1}
+; CHECK-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2,  i8 3, i32 4)
   ret < 4 x float> %res
@@ -98,9 +100,7 @@ define <4 x float>@test_const_3_mask(<4 x float> %v0, <4 x float> %v1, <4 x floa
 define <4 x float>@test_const_3_maskz(<4 x float> %v0, <4 x float> %v1, <4 x float> %v2) {
 ; CHECK-LABEL: test_const_3_maskz:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    kxnorw %k0, %k0, %k0
-; CHECK-NEXT:    kshiftrw $15, %k0, %k1
-; CHECK-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %v0,<4 x float> %v1, <4 x float> %v2,  i8 3, i32 4)
   ret < 4 x float> %res
author	Guy Blank <guy.blank@intel.com>	2017-05-09 16:16:48 +0000
committer	Guy Blank <guy.blank@intel.com>	2017-05-09 16:16:48 +0000
commit	0c42d8c35ba6acd4d3620915c64af5a7badc1e73 (patch)
tree	a390f5b8f17ecfe6f12109bb6ba2cd204ec72e75
parent	f7f6f823a4d71e7febbeaf1db4fccbb5a83005d3 (diff)
download	bcm5719-llvm-0c42d8c35ba6acd4d3620915c64af5a7badc1e73.tar.gz bcm5719-llvm-0c42d8c35ba6acd4d3620915c64af5a7badc1e73.zip