summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-12-20 19:25:33 +0000
committerCraig Topper <craig.topper@intel.com>2017-12-20 19:25:33 +0000
commit07820f2fe4482a58031f4101d12b3abb092fc4e2 (patch)
tree7b551eee71026b8a7b2bd0108e0ac9405c6aca86
parentd60951f469919476dfae4ca3f5ccb153c7072f47 (diff)
downloadbcm5719-llvm-07820f2fe4482a58031f4101d12b3abb092fc4e2.tar.gz
bcm5719-llvm-07820f2fe4482a58031f4101d12b3abb092fc4e2.zip
[X86] Remove zext from vXi32 to vXi64 on indices of gather/scatter instructions if we can prove the pre-extended value is positive.
Gather/scatter can implicitly sign extend from i32->i64 on indices. So if we know the sign bit of the input to a zext is 0 we can use the implicit extension. llvm-svn: 321209
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp17
-rw-r--r--llvm/test/CodeGen/X86/masked_gather_scatter.ll45
2 files changed, 62 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index fec1a0c792d..edfebbf4b83 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36273,6 +36273,23 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
DCI.AddToWorklist(N);
return SDValue(N, 0);
}
+
+ // Try to remove zero extends from 32->64 if we know the sign bit of
+ // the input is zero.
+ if (Index.getOpcode() == ISD::ZERO_EXTEND &&
+ Index.getScalarValueSizeInBits() == 64 &&
+ Index.getOperand(0).getScalarValueSizeInBits() == 32) {
+ if (DAG.SignBitIsZero(Index.getOperand(0))) {
+ SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+ NewOps[4] = Index.getOperand(0);
+ DAG.UpdateNodeOperands(N, NewOps);
+ // The original zero extend has less users, add back to worklist in case
+ // it needs to be removed
+ DCI.AddToWorklist(Index.getNode());
+ DCI.AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+ }
}
// Gather and Scatter instructions use k-registers for masks. The type of
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index f70b6024241..b2842e556fd 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -2742,3 +2742,48 @@ define void @test_scatter_2i32_index(<2 x double> %a1, double* %base, <2 x i32>
}
declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double>, <2 x double*>, i32, <2 x i1>)
+define <16 x float> @zext_index(float* %base, <16 x i32> %ind) {
+; KNL_64-LABEL: zext_index:
+; KNL_64: # %bb.0:
+; KNL_64-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1
+; KNL_64-NEXT: kxnorw %k0, %k0, %k1
+; KNL_64-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: zext_index:
+; KNL_32: # %bb.0:
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpandd {{\.LCPI.*}}{1to16}, %zmm0, %zmm1
+; KNL_32-NEXT: kxnorw %k0, %k0, %k1
+; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
+; KNL_32-NEXT: retl
+;
+; SKX_SMALL-LABEL: zext_index:
+; SKX_SMALL: # %bb.0:
+; SKX_SMALL-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm1
+; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
+; SKX_SMALL-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
+; SKX_SMALL-NEXT: retq
+;
+; SKX_LARGE-LABEL: zext_index:
+; SKX_LARGE: # %bb.0:
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vandps (%rax){1to16}, %zmm0, %zmm1
+; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
+; SKX_LARGE-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
+; SKX_LARGE-NEXT: retq
+;
+; SKX_32-LABEL: zext_index:
+; SKX_32: # %bb.0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vandps {{\.LCPI.*}}{1to16}, %zmm0, %zmm1
+; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
+; SKX_32-NEXT: retl
+ %ind_masked = and <16 x i32> %ind, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+ %sext_ind = zext <16 x i32> %ind_masked to <16 x i64>
+ %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
+
+ %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+ ret <16 x float>%res
+}
OpenPOWER on IntegriCloud