diff options
6 files changed, 39 insertions, 34 deletions
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 4a875311881..ad86c8c2828 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -891,13 +891,13 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // Clear the inbounds attribute because the new index may be off-bound. // e.g., // - // b = add i64 a, 5 - // addr = gep inbounds float* p, i64 b + // b = add i64 a, 5 + // addr = gep inbounds float, float* p, i64 b // // is transformed to: // - // addr2 = gep float* p, i64 a - // addr = gep float* addr2, i64 5 + // addr2 = gep float, float* p, i64 a ; inbounds removed + // addr = gep inbounds float, float* addr2, i64 5 // // If a is -4, although the old index b is in bounds, the new index a is // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the @@ -907,6 +907,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // // TODO(jingyue): do some range analysis to keep as many inbounds as // possible. GEPs with inbounds are more friendly to alias analysis. + bool GEPWasInBounds = GEP->isInBounds(); GEP->setIsInBounds(false); // Lowers a GEP to either GEPs with a single index or arithmetic operations. @@ -968,6 +969,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP); + // Inherit the inbounds attribute of the original GEP. + cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds); } else { // Unlikely but possible. For example, // #pragma pack(1) @@ -990,6 +993,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { Type::getInt8Ty(GEP->getContext()), NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep", GEP); + // Inherit the inbounds attribute of the original GEP. + cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds); if (GEP->getType() != I8PtrTy) NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP); } diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll index 527634db0f5..6f117697dde 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll @@ -6,9 +6,9 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; IR-LABEL: @sum_of_array( ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 1 -; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 32 -; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 33 +; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 1 +; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 32 +; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 33 define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) { %tmp = sext i32 %y to i64 %tmp1 = sext i32 %x to i64 @@ -38,7 +38,7 @@ define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) ; IR-LABEL: @sum_of_array_over_max_mubuf_offset( ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 255 +; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 255 ; IR: add i32 %x, 256 ; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} ; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} @@ -71,9 +71,9 @@ define void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace( ; DS instructions have a larger immediate offset, so make sure these are OK. ; IR-LABEL: @sum_of_lds_array_over_max_mubuf_offset( ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %{{[a-zA-Z0-9]+}}, i32 %{{[a-zA-Z0-9]+}} -; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 255 -; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16128 -; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16383 +; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 255 +; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16128 +; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16383 define void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) { %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %y %tmp4 = load float, float addrspace(3)* %tmp2, align 4 diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll index 073313d40e7..a0410024f6e 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll @@ -52,9 +52,9 @@ define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) { ; IR-LABEL: @sum_of_array( ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1 -; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32 -; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33 +; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1 +; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32 +; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33 ; @sum_of_array2 is very similar to @sum_of_array. The only difference is in ; the order of "sext" and "add" when computing the array indices. @sum_of_array @@ -95,9 +95,9 @@ define void @sum_of_array2(i32 %x, i32 %y, float* nocapture %output) { ; IR-LABEL: @sum_of_array2( ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1 -; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32 -; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33 +; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1 +; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32 +; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33 ; This function loads @@ -145,9 +145,9 @@ define void @sum_of_array3(i32 %x, i32 %y, float* nocapture %output) { ; IR-LABEL: @sum_of_array3( ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1 -; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32 -; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33 +; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1 +; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32 +; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33 ; This function loads @@ -191,6 +191,6 @@ define void @sum_of_array4(i32 %x, i32 %y, float* nocapture %output) { ; IR-LABEL: @sum_of_array4( ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1 -; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32 -; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33 +; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1 +; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32 +; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33 diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll index 2fdd158a35e..eeeac196374 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll @@ -44,7 +44,7 @@ entry: ; CHECK: add i32 %j, -2 ; CHECK: sext ; CHECK: getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -; CHECK: getelementptr float, float* %{{[a-zA-Z0-9]+}}, i64 32 +; CHECK: getelementptr inbounds float, float* %{{[a-zA-Z0-9]+}}, i64 32 ; We should be able to trace into sext/zext if it can be distributed to both ; operands, e.g., sext (add nsw a, b) == add nsw (sext a), (sext b) @@ -65,7 +65,7 @@ define float* @ext_add_no_overflow(i64 %a, i32 %b, i64 %c, i32 %d) { } ; CHECK-LABEL: @ext_add_no_overflow( ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -; CHECK: getelementptr float, float* [[BASE_PTR]], i64 33 +; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 33 ; Verifies we handle nested sext/zext correctly. define void @sext_zext(i32 %a, i32 %b, float** %out1, float** %out2) { @@ -110,7 +110,7 @@ entry: } ; CHECK-LABEL: @sext_or( ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -; CHECK: getelementptr float, float* [[BASE_PTR]], i64 32 +; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 32 ; The subexpression (b + 5) is used in both "i = a + (b + 5)" and "*out = b + ; 5". When extracting the constant offset 5, make sure "*out = b + 5" isn't @@ -125,7 +125,7 @@ entry: } ; CHECK-LABEL: @expr( ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 0 -; CHECK: getelementptr float, float* [[BASE_PTR]], i64 160 +; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 160 ; CHECK: store i64 %b5, i64* %out ; d + sext(a +nsw (b +nsw (c +nsw 8))) => (d + sext(a) + sext(b) + sext(c)) + 8 @@ -143,7 +143,7 @@ entry: ; CHECK: sext i32 ; CHECK: sext i32 ; CHECK: sext i32 -; CHECK: getelementptr float, float* %{{[a-zA-Z0-9]+}}, i64 8 +; CHECK: getelementptr inbounds float, float* %{{[a-zA-Z0-9]+}}, i64 8 ; Verifies we handle "sub" correctly. define float* @sub(i64 %i, i64 %j) { @@ -155,7 +155,7 @@ define float* @sub(i64 %i, i64 %j) { ; CHECK-LABEL: @sub( ; CHECK: %[[j2:[a-zA-Z0-9]+]] = sub i64 0, %j ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %[[j2]] -; CHECK: getelementptr float, float* [[BASE_PTR]], i64 -155 +; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 -155 %struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed @@ -173,7 +173,7 @@ entry: ; CHECK-LABEL: @packed_struct( ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [1024 x %struct.Packed], [1024 x %struct.Packed]* %s, i64 0, i64 %{{[a-zA-Z0-9]+}}, i32 1, i64 %{{[a-zA-Z0-9]+}} ; CHECK: [[CASTED_PTR:%[a-zA-Z0-9]+]] = bitcast i64* [[BASE_PTR]] to i8* -; CHECK: %uglygep = getelementptr i8, i8* [[CASTED_PTR]], i64 100 +; CHECK: %uglygep = getelementptr inbounds i8, i8* [[CASTED_PTR]], i64 100 ; CHECK: bitcast i8* %uglygep to i64* ; We shouldn't be able to extract the 8 from "zext(a +nuw (b + 8))", @@ -272,7 +272,7 @@ entry: %ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1 ; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1 ; CHECK: [[PTR1:%[a-zA-Z0-9]+]] = bitcast %struct2* [[PTR]] to i8* -; CHECK: getelementptr i8, i8* [[PTR1]], i64 -64 +; CHECK: getelementptr inbounds i8, i8* [[PTR1]], i64 -64 ; CHECK: bitcast ret %struct2* %ptr2 ; CHECK-NEXT: ret diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll index d1a0f33d5a2..601ca529135 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll @@ -25,7 +25,7 @@ then: %or = or i64 %i, 3 %p = getelementptr inbounds float, float* %input, i64 %or ; CHECK: [[base:[^ ]+]] = getelementptr float, float* %input, i64 %i -; CHECK: getelementptr float, float* [[base]], i64 3 +; CHECK: getelementptr inbounds float, float* [[base]], i64 3 ret float* %p exit: diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll b/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll index 278250a9c80..f2853aca698 100644 --- a/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll +++ b/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll @@ -57,10 +57,10 @@ bb: ; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset( ; CHECK: [[B1:%[0-9]+]] = getelementptr float, float addrspace(3)* %arr, i32 %i -; CHECK: getelementptr float, float addrspace(3)* [[B1]], i32 16383 +; CHECK: getelementptr inbounds float, float addrspace(3)* [[B1]], i32 16383 ; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i -; CHECK: getelementptr float, float addrspace(3)* [[B2]], i32 16383 +; CHECK: getelementptr inbounds float, float addrspace(3)* [[B2]], i32 16383 define void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) { bb: %i2 = shl nsw i32 %i, 1 |