diff options
| author | Hiroshi Inoue <inouehrs@jp.ibm.com> | 2018-01-16 06:23:05 +0000 |
|---|---|---|
| committer | Hiroshi Inoue <inouehrs@jp.ibm.com> | 2018-01-16 06:23:05 +0000 |
| commit | 99a8faa6153a1009c17e2b11290828e48e1ea706 (patch) | |
| tree | d8b641d48d898f5648bf9e1fac6752956cd2d678 /llvm/test | |
| parent | 7a0c601f95fcf07346c295298671d439e88e9ef6 (diff) | |
| download | bcm5719-llvm-99a8faa6153a1009c17e2b11290828e48e1ea706.tar.gz bcm5719-llvm-99a8faa6153a1009c17e2b11290828e48e1ea706.zip | |
[SROA] fix assetion failure
This patch fixes the assertion failure in SROA reported in PR35657.
PR35657 reports the assertion failure due to r319522 (splitting for non-whole-alloca slices), but this problem can happen even without r319522.
The problem exists in a check for reusing an existing alloca when rewriting partitions. As the original comment said, we can reuse the existing alloca if the new alloca has the same type and offset with the existing one. But the code checks only type of the alloca and then check the offset using an assert.
In a corner case with out-of-bounds access (e.g. @PR35657 function added in unit test), it is possible that the two allocas have the same type but different offsets.
This patch makes the check of the offset in the if condition, and re-enables the splitting for non-whole-alloca slices.
Differential Revision: https://reviews.llvm.org/D41981
llvm-svn: 322533
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/DebugInfo/X86/sroasplit-2.ll | 3 | ||||
| -rw-r--r-- | llvm/test/Transforms/SROA/basictest.ll | 57 | ||||
| -rw-r--r-- | llvm/test/Transforms/SROA/big-endian.ll | 40 |
3 files changed, 83 insertions, 17 deletions
diff --git a/llvm/test/DebugInfo/X86/sroasplit-2.ll b/llvm/test/DebugInfo/X86/sroasplit-2.ll index b2bec7cede0..3e99ec1e16a 100644 --- a/llvm/test/DebugInfo/X86/sroasplit-2.ll +++ b/llvm/test/DebugInfo/X86/sroasplit-2.ll @@ -21,7 +21,8 @@ ; Verify that SROA creates a variable piece when splitting i1. ; CHECK: call void @llvm.dbg.value(metadata i64 %outer.coerce0, metadata ![[O:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), -; CHECK: call void @llvm.dbg.value(metadata i64 %outer.coerce1, metadata ![[O]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), +; CHECK: call void @llvm.dbg.value(metadata i32 {{.*}}, metadata ![[O]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 32)), +; CHECK: call void @llvm.dbg.value(metadata i32 {{.*}}, metadata ![[O]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32)), ; CHECK: call void @llvm.dbg.value({{.*}}, metadata ![[I1:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)), ; CHECK-DAG: ![[O]] = !DILocalVariable(name: "outer",{{.*}} line: 10 ; CHECK-DAG: ![[I1]] = !DILocalVariable(name: "i1",{{.*}} line: 11 diff --git a/llvm/test/Transforms/SROA/basictest.ll b/llvm/test/Transforms/SROA/basictest.ll index aa00e89ea04..8a30be9bf4f 100644 --- a/llvm/test/Transforms/SROA/basictest.ll +++ b/llvm/test/Transforms/SROA/basictest.ll @@ -1615,13 +1615,13 @@ define i16 @PR24463() { ; Ensure we can handle a very interesting case where there is an integer-based ; rewrite of the uses of the alloca, but where one of the integers in that is ; a sub-integer that requires extraction *and* extends past the end of the -; alloca. In this case, we should extract the i8 and then zext it to i16. +; alloca. SROA can split the alloca to avoid shift or trunc. ; ; CHECK-LABEL: @PR24463( ; CHECK-NOT: alloca -; CHECK: %[[SHIFT:.*]] = lshr i16 0, 8 -; CHECK: %[[TRUNC:.*]] = trunc i16 %[[SHIFT]] to i8 -; CHECK: %[[ZEXT:.*]] = zext i8 %[[TRUNC]] to i16 +; CHECK-NOT: trunc +; CHECK-NOT: lshr +; CHECK: %[[ZEXT:.*]] = zext i8 {{.*}} to i16 ; CHECK: ret i16 %[[ZEXT]] entry: %alloca = alloca [3 x i8] @@ -1695,3 +1695,52 @@ bb1: call void @llvm.lifetime.end.p0i8(i64 2, i8* %0) ret void } + +; PR35657 reports assertion failure with this code +define void @PR35657(i64 %v) { +; CHECK-LABEL: @PR35657 +; CHECK: call void @callee16(i16 %{{.*}}) +; CHECK: call void @callee48(i48 %{{.*}}) +; CHECK: ret void +entry: + %a48 = alloca i48 + %a48.cast64 = bitcast i48* %a48 to i64* + store i64 %v, i64* %a48.cast64 + %a48.cast16 = bitcast i48* %a48 to i16* + %b0_15 = load i16, i16* %a48.cast16 + %a48.cast8 = bitcast i48* %a48 to i8* + %a48_offset2 = getelementptr inbounds i8, i8* %a48.cast8, i64 2 + %a48_offset2.cast48 = bitcast i8* %a48_offset2 to i48* + %b16_63 = load i48, i48* %a48_offset2.cast48, align 2 + call void @callee16(i16 %b0_15) + call void @callee48(i48 %b16_63) + ret void +} + +declare void @callee16(i16 %a) +declare void @callee48(i48 %a) + +define void @test28(i64 %v) #0 { +; SROA should split the first i64 store to avoid additional and/or instructions +; when storing into i32 fields + +; CHECK-LABEL: @test28( +; CHECK-NOT: alloca +; CHECK-NOT: and +; CHECK-NOT: or +; CHECK: %[[shift:.*]] = lshr i64 %v, 32 +; CHECK-NEXT: %{{.*}} = trunc i64 %[[shift]] to i32 +; CHECK-NEXT: ret void + +entry: + %t = alloca { i64, i32, i32 } + + %b = getelementptr { i64, i32, i32 }, { i64, i32, i32 }* %t, i32 0, i32 1 + %0 = bitcast i32* %b to i64* + store i64 %v, i64* %0 + + %1 = load i32, i32* %b + %c = getelementptr { i64, i32, i32 }, { i64, i32, i32 }* %t, i32 0, i32 2 + store i32 %1, i32* %c + ret void +} diff --git a/llvm/test/Transforms/SROA/big-endian.ll b/llvm/test/Transforms/SROA/big-endian.ll index ea41a20fd38..fc4b8b28855 100644 --- a/llvm/test/Transforms/SROA/big-endian.ll +++ b/llvm/test/Transforms/SROA/big-endian.ll @@ -83,19 +83,34 @@ entry: store i16 1, i16* %a0i16ptr store i8 1, i8* %a2ptr -; CHECK: %[[mask1:.*]] = and i40 undef, 4294967295 -; CHECK-NEXT: %[[insert1:.*]] = or i40 %[[mask1]], 4294967296 %a3i24ptr = bitcast i8* %a3ptr to i24* store i24 1, i24* %a3i24ptr -; CHECK-NEXT: %[[mask2:.*]] = and i40 %[[insert1]], -4294967041 -; CHECK-NEXT: %[[insert2:.*]] = or i40 %[[mask2]], 256 %a2i40ptr = bitcast i8* %a2ptr to i40* store i40 1, i40* %a2i40ptr -; CHECK-NEXT: %[[ext3:.*]] = zext i40 1 to i56 -; CHECK-NEXT: %[[mask3:.*]] = and i56 undef, -1099511627776 -; CHECK-NEXT: %[[insert3:.*]] = or i56 %[[mask3]], %[[ext3]] + +; the alloca is splitted into multiple slices +; Here, i8 1 is for %a[6] +; CHECK: %[[ext1:.*]] = zext i8 1 to i40 +; CHECK-NEXT: %[[mask1:.*]] = and i40 undef, -256 +; CHECK-NEXT: %[[insert1:.*]] = or i40 %[[mask1]], %[[ext1]] + +; Here, i24 0 is for %a[3] to %a[5] +; CHECK-NEXT: %[[ext2:.*]] = zext i24 0 to i40 +; CHECK-NEXT: %[[shift2:.*]] = shl i40 %[[ext2]], 8 +; CHECK-NEXT: %[[mask2:.*]] = and i40 %[[insert1]], -4294967041 +; CHECK-NEXT: %[[insert2:.*]] = or i40 %[[mask2]], %[[shift2]] + +; Here, i8 0 is for %a[2] +; CHECK-NEXT: %[[ext3:.*]] = zext i8 0 to i40 +; CHECK-NEXT: %[[shift3:.*]] = shl i40 %[[ext3]], 32 +; CHECK-NEXT: %[[mask3:.*]] = and i40 %[[insert2]], 4294967295 +; CHECK-NEXT: %[[insert3:.*]] = or i40 %[[mask3]], %[[shift3]] + +; CHECK-NEXT: %[[ext4:.*]] = zext i40 %[[insert3]] to i56 +; CHECK-NEXT: %[[mask4:.*]] = and i56 undef, -1099511627776 +; CHECK-NEXT: %[[insert4:.*]] = or i56 %[[mask4]], %[[ext4]] ; CHECK-NOT: store ; CHECK-NOT: load @@ -104,11 +119,12 @@ entry: %ai = load i56, i56* %aiptr %ret = zext i56 %ai to i64 ret i64 %ret -; CHECK-NEXT: %[[ext4:.*]] = zext i16 1 to i56 -; CHECK-NEXT: %[[shift4:.*]] = shl i56 %[[ext4]], 40 -; CHECK-NEXT: %[[mask4:.*]] = and i56 %[[insert3]], 1099511627775 -; CHECK-NEXT: %[[insert4:.*]] = or i56 %[[mask4]], %[[shift4]] -; CHECK-NEXT: %[[ret:.*]] = zext i56 %[[insert4]] to i64 +; Here, i16 1 is for %a[0] to %a[1] +; CHECK-NEXT: %[[ext5:.*]] = zext i16 1 to i56 +; CHECK-NEXT: %[[shift5:.*]] = shl i56 %[[ext5]], 40 +; CHECK-NEXT: %[[mask5:.*]] = and i56 %[[insert4]], 1099511627775 +; CHECK-NEXT: %[[insert5:.*]] = or i56 %[[mask5]], %[[shift5]] +; CHECK-NEXT: %[[ret:.*]] = zext i56 %[[insert5]] to i64 ; CHECK-NEXT: ret i64 %[[ret]] } |

