[SROA] fix assetion failure

This patch fixes the assertion failure in SROA reported in PR35657. PR35657 reports the assertion failure due to r319522 (splitting for non-whole-alloca slices), but this problem can happen even without r319522. The problem exists in a check for reusing an existing alloca when rewriting partitions. As the original comment said, we can reuse the existing alloca if the new alloca has the same type and offset with the existing one. But the code checks only type of the alloca and then check the offset using an assert. In a corner case with out-of-bounds access (e.g. @PR35657 function added in unit test), it is possible that the two allocas have the same type but different offsets. This patch makes the check of the offset in the if condition, and re-enables the splitting for non-whole-alloca slices. Differential Revision: https://reviews.llvm.org/D41981 llvm-svn: 322533
author: Hiroshi Inoue <inouehrs@jp.ibm.com> 2018-01-16 06:23:05 +0000
committer: Hiroshi Inoue <inouehrs@jp.ibm.com> 2018-01-16 06:23:05 +0000
commit: 99a8faa6153a1009c17e2b11290828e48e1ea706 (patch)
tree: d8b641d48d898f5648bf9e1fac6752956cd2d678 /llvm/test
parent: 7a0c601f95fcf07346c295298671d439e88e9ef6 (diff)
download: bcm5719-llvm-99a8faa6153a1009c17e2b11290828e48e1ea706.tar.gz
bcm5719-llvm-99a8faa6153a1009c17e2b11290828e48e1ea706.zip
3 files changed, 83 insertions, 17 deletions
diff --git a/llvm/test/DebugInfo/X86/sroasplit-2.ll b/llvm/test/DebugInfo/X86/sroasplit-2.ll
index b2bec7cede0..3e99ec1e16a 100644
--- a/llvm/test/DebugInfo/X86/sroasplit-2.ll
+++ b/llvm/test/DebugInfo/X86/sroasplit-2.ll
@@ -21,7 +21,8 @@
 
 ; Verify that SROA creates a variable piece when splitting i1.
 ; CHECK:  call void @llvm.dbg.value(metadata i64 %outer.coerce0, metadata ![[O:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)),
-; CHECK:  call void @llvm.dbg.value(metadata i64 %outer.coerce1, metadata ![[O]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)),
+; CHECK:  call void @llvm.dbg.value(metadata i32 {{.*}}, metadata ![[O]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 32)),
+; CHECK:  call void @llvm.dbg.value(metadata i32 {{.*}}, metadata ![[O]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32)),
 ; CHECK:  call void @llvm.dbg.value({{.*}}, metadata ![[I1:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)),
 ; CHECK-DAG: ![[O]] = !DILocalVariable(name: "outer",{{.*}} line: 10
 ; CHECK-DAG: ![[I1]] = !DILocalVariable(name: "i1",{{.*}} line: 11
diff --git a/llvm/test/Transforms/SROA/basictest.ll b/llvm/test/Transforms/SROA/basictest.ll
index aa00e89ea04..8a30be9bf4f 100644
--- a/llvm/test/Transforms/SROA/basictest.ll
+++ b/llvm/test/Transforms/SROA/basictest.ll
@@ -1615,13 +1615,13 @@ define i16 @PR24463() {
 ; Ensure we can handle a very interesting case where there is an integer-based
 ; rewrite of the uses of the alloca, but where one of the integers in that is
 ; a sub-integer that requires extraction *and* extends past the end of the
-; alloca. In this case, we should extract the i8 and then zext it to i16.
+; alloca. SROA can split the alloca to avoid shift or trunc.
 ;
 ; CHECK-LABEL: @PR24463(
 ; CHECK-NOT: alloca
-; CHECK: %[[SHIFT:.*]] = lshr i16 0, 8
-; CHECK: %[[TRUNC:.*]] = trunc i16 %[[SHIFT]] to i8
-; CHECK: %[[ZEXT:.*]] = zext i8 %[[TRUNC]] to i16
+; CHECK-NOT: trunc
+; CHECK-NOT: lshr
+; CHECK: %[[ZEXT:.*]] = zext i8 {{.*}} to i16
 ; CHECK: ret i16 %[[ZEXT]]
 entry:
   %alloca = alloca [3 x i8]
@@ -1695,3 +1695,52 @@ bb1:
   call void @llvm.lifetime.end.p0i8(i64 2, i8* %0)
   ret void
 }
+
+; PR35657 reports assertion failure with this code
+define void @PR35657(i64 %v) {
+; CHECK-LABEL: @PR35657
+; CHECK: call void @callee16(i16 %{{.*}})
+; CHECK: call void @callee48(i48 %{{.*}})
+; CHECK: ret void
+entry:
+  %a48 = alloca i48
+  %a48.cast64 = bitcast i48* %a48 to i64*
+  store i64 %v, i64* %a48.cast64
+  %a48.cast16 = bitcast i48* %a48 to i16*
+  %b0_15 = load i16, i16* %a48.cast16
+  %a48.cast8 = bitcast i48* %a48 to i8*
+  %a48_offset2 = getelementptr inbounds i8, i8* %a48.cast8, i64 2
+  %a48_offset2.cast48 = bitcast i8* %a48_offset2 to i48*
+  %b16_63 = load i48, i48* %a48_offset2.cast48, align 2
+  call void @callee16(i16 %b0_15)
+  call void @callee48(i48 %b16_63)
+  ret void
+}
+
+declare void @callee16(i16 %a)
+declare void @callee48(i48 %a)
+
+define void @test28(i64 %v) #0 {
+; SROA should split the first i64 store to avoid additional and/or instructions
+; when storing into i32 fields
+
+; CHECK-LABEL: @test28(
+; CHECK-NOT: alloca
+; CHECK-NOT: and
+; CHECK-NOT: or
+; CHECK:      %[[shift:.*]] = lshr i64 %v, 32
+; CHECK-NEXT: %{{.*}} = trunc i64 %[[shift]] to i32
+; CHECK-NEXT: ret void
+
+entry:
+  %t = alloca { i64, i32, i32 }
+
+  %b = getelementptr { i64, i32, i32 }, { i64, i32, i32 }* %t, i32 0, i32 1
+  %0 = bitcast i32* %b to i64*
+  store i64 %v, i64* %0
+
+  %1 = load i32, i32* %b
+  %c = getelementptr { i64, i32, i32 }, { i64, i32, i32 }* %t, i32 0, i32 2
+  store i32 %1, i32* %c
+  ret void
+}
diff --git a/llvm/test/Transforms/SROA/big-endian.ll b/llvm/test/Transforms/SROA/big-endian.ll
index ea41a20fd38..fc4b8b28855 100644
--- a/llvm/test/Transforms/SROA/big-endian.ll
+++ b/llvm/test/Transforms/SROA/big-endian.ll
@@ -83,19 +83,34 @@ entry:
   store i16 1, i16* %a0i16ptr
 
   store i8 1, i8* %a2ptr
-; CHECK:      %[[mask1:.*]] = and i40 undef, 4294967295
-; CHECK-NEXT: %[[insert1:.*]] = or i40 %[[mask1]], 4294967296
 
   %a3i24ptr = bitcast i8* %a3ptr to i24*
   store i24 1, i24* %a3i24ptr
-; CHECK-NEXT: %[[mask2:.*]] = and i40 %[[insert1]], -4294967041
-; CHECK-NEXT: %[[insert2:.*]] = or i40 %[[mask2]], 256
 
   %a2i40ptr = bitcast i8* %a2ptr to i40*
   store i40 1, i40* %a2i40ptr
-; CHECK-NEXT: %[[ext3:.*]] = zext i40 1 to i56
-; CHECK-NEXT: %[[mask3:.*]] = and i56 undef, -1099511627776
-; CHECK-NEXT: %[[insert3:.*]] = or i56 %[[mask3]], %[[ext3]]
+
+; the alloca is splitted into multiple slices
+; Here, i8 1 is for %a[6]
+; CHECK: %[[ext1:.*]] = zext i8 1 to i40
+; CHECK-NEXT: %[[mask1:.*]] = and i40 undef, -256
+; CHECK-NEXT: %[[insert1:.*]] = or i40 %[[mask1]], %[[ext1]]
+
+; Here, i24 0 is for %a[3] to %a[5]
+; CHECK-NEXT: %[[ext2:.*]] = zext i24 0 to i40
+; CHECK-NEXT: %[[shift2:.*]] = shl i40 %[[ext2]], 8
+; CHECK-NEXT: %[[mask2:.*]] = and i40 %[[insert1]], -4294967041
+; CHECK-NEXT: %[[insert2:.*]] = or i40 %[[mask2]], %[[shift2]]
+
+; Here, i8 0 is for %a[2]
+; CHECK-NEXT: %[[ext3:.*]] = zext i8 0 to i40
+; CHECK-NEXT: %[[shift3:.*]] = shl i40 %[[ext3]], 32
+; CHECK-NEXT: %[[mask3:.*]] = and i40 %[[insert2]], 4294967295
+; CHECK-NEXT: %[[insert3:.*]] = or i40 %[[mask3]], %[[shift3]]
+
+; CHECK-NEXT: %[[ext4:.*]] = zext i40 %[[insert3]] to i56
+; CHECK-NEXT: %[[mask4:.*]] = and i56 undef, -1099511627776
+; CHECK-NEXT: %[[insert4:.*]] = or i56 %[[mask4]], %[[ext4]]
 
 ; CHECK-NOT: store
 ; CHECK-NOT: load
@@ -104,11 +119,12 @@ entry:
   %ai = load i56, i56* %aiptr
   %ret = zext i56 %ai to i64
   ret i64 %ret
-; CHECK-NEXT: %[[ext4:.*]] = zext i16 1 to i56
-; CHECK-NEXT: %[[shift4:.*]] = shl i56 %[[ext4]], 40
-; CHECK-NEXT: %[[mask4:.*]] = and i56 %[[insert3]], 1099511627775
-; CHECK-NEXT: %[[insert4:.*]] = or i56 %[[mask4]], %[[shift4]]
-; CHECK-NEXT: %[[ret:.*]] = zext i56 %[[insert4]] to i64
+; Here, i16 1 is for %a[0] to %a[1]
+; CHECK-NEXT: %[[ext5:.*]] = zext i16 1 to i56
+; CHECK-NEXT: %[[shift5:.*]] = shl i56 %[[ext5]], 40
+; CHECK-NEXT: %[[mask5:.*]] = and i56 %[[insert4]], 1099511627775
+; CHECK-NEXT: %[[insert5:.*]] = or i56 %[[mask5]], %[[shift5]]
+; CHECK-NEXT: %[[ret:.*]] = zext i56 %[[insert5]] to i64
 ; CHECK-NEXT: ret i64 %[[ret]]
 }
author	Hiroshi Inoue <inouehrs@jp.ibm.com>	2018-01-16 06:23:05 +0000
committer	Hiroshi Inoue <inouehrs@jp.ibm.com>	2018-01-16 06:23:05 +0000
commit	99a8faa6153a1009c17e2b11290828e48e1ea706 (patch)
tree	d8b641d48d898f5648bf9e1fac6752956cd2d678 /llvm/test
parent	7a0c601f95fcf07346c295298671d439e88e9ef6 (diff)
download	bcm5719-llvm-99a8faa6153a1009c17e2b11290828e48e1ea706.tar.gz bcm5719-llvm-99a8faa6153a1009c17e2b11290828e48e1ea706.zip