summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-07-27 00:00:30 +0000
committerCraig Topper <craig.topper@intel.com>2018-07-27 00:00:30 +0000
commitf7bc55022338a7915ede107fe8cf74065656e114 (patch)
tree8bafaae16cf13cbcac47153a96af30b4a0eae2aa /llvm/test/CodeGen
parentdd0b344339526b91b1a9703122743d1a4a8f5157 (diff)
downloadbcm5719-llvm-f7bc55022338a7915ede107fe8cf74065656e114.tar.gz
bcm5719-llvm-f7bc55022338a7915ede107fe8cf74065656e114.zip
[X86] When removing sign extends from gather/scatter indices, make sure we handle UpdateNodeOperands finding an existing node to CSE with.
If this happens the operands aren't updated and the existing node is returned. Make sure we pass this existing node up to the DAG combiner so that a proper replacement happens. Otherwise we get stuck in an infinite loop with an unoptimized node. llvm-svn: 338090
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/X86/masked_gather_scatter.ll51
1 files changed, 51 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index 6bd8f9295a6..bba1b392068 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -2928,3 +2928,54 @@ define void @test_scatter_setcc_split(double* %base, <16 x i32> %ind, <16 x i32>
call void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %gep.random, i32 4, <16 x i1> %mask)
ret void
}
+
+; This test case previously triggered an infinite loop when the two gathers became identical after DAG combine removed the sign extend.
+define <16 x float> @test_sext_cse(float* %base, <16 x i32> %ind, <16 x i32>* %foo) {
+; KNL_64-LABEL: test_sext_cse:
+; KNL_64: # %bb.0:
+; KNL_64-NEXT: vmovaps %zmm0, (%rsi)
+; KNL_64-NEXT: kxnorw %k0, %k0, %k1
+; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT: vaddps %zmm1, %zmm1, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test_sext_cse:
+; KNL_32: # %bb.0:
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; KNL_32-NEXT: vmovaps %zmm0, (%ecx)
+; KNL_32-NEXT: kxnorw %k0, %k0, %k1
+; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT: vaddps %zmm1, %zmm1, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test_sext_cse:
+; SKX: # %bb.0:
+; SKX-NEXT: vmovaps %zmm0, (%rsi)
+; SKX-NEXT: kxnorw %k0, %k0, %k1
+; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT: vaddps %zmm1, %zmm1, %zmm0
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test_sext_cse:
+; SKX_32: # %bb.0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; SKX_32-NEXT: vmovaps %zmm0, (%ecx)
+; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vaddps %zmm1, %zmm1, %zmm0
+; SKX_32-NEXT: retl
+ %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
+ %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+
+ %sext_ind = sext <16 x i32> %ind to <16 x i64>
+ %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
+
+ store <16 x i32> %ind, <16 x i32>* %foo
+ %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+ %gep.random2 = getelementptr float, <16 x float*> %broadcast.splat, <16 x i32> %ind
+ %res2 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random2, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+ %res3 = fadd <16 x float> %res2, %res
+ ret <16 x float>%res3
+}
OpenPOWER on IntegriCloud