diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Transforms/Scalar/SROA.cpp | 44 | ||||
-rw-r--r-- | llvm/test/Transforms/SROA/alignment.ll | 63 |
2 files changed, 87 insertions, 20 deletions
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 316742a0da6..58bae0971e3 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2170,18 +2170,36 @@ private: return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName("")); } - unsigned getAdjustedAlign(uint64_t Offset) { + /// \brief Compute suitable alignment to access an offset into the new alloca. + unsigned getOffsetAlign(uint64_t Offset) { unsigned NewAIAlign = NewAI.getAlignment(); if (!NewAIAlign) NewAIAlign = TD.getABITypeAlignment(NewAI.getAllocatedType()); return MinAlign(NewAIAlign, Offset); } - unsigned getAdjustedAlign() { - return getAdjustedAlign(BeginOffset - NewAllocaBeginOffset); + + /// \brief Compute suitable alignment to access this partition of the new + /// alloca. + unsigned getPartitionAlign() { + return getOffsetAlign(BeginOffset - NewAllocaBeginOffset); } - bool isTypeAlignSufficient(Type *Ty) { - return TD.getABITypeAlignment(Ty) >= getAdjustedAlign(); + /// \brief Compute suitable alignment to access a type at an offset of the + /// new alloca. + /// + /// \returns zero if the type's ABI alignment is a suitable alignment, + /// otherwise returns the maximal suitable alignment. + unsigned getOffsetTypeAlign(Type *Ty, uint64_t Offset) { + unsigned Align = getOffsetAlign(Offset); + return Align == TD.getABITypeAlignment(Ty) ? 0 : Align; + } + + /// \brief Compute suitable alignment to access a type at the beginning of + /// this partition of the new alloca. + /// + /// See \c getOffsetTypeAlign for details; this routine delegates to it. + unsigned getPartitionTypeAlign(Type *Ty) { + return getOffsetTypeAlign(Ty, BeginOffset - NewAllocaBeginOffset); } ConstantInt *getIndex(IRBuilder<> &IRB, uint64_t Offset) { @@ -2292,8 +2310,7 @@ private: Value *NewPtr = getAdjustedAllocaPtr(IRB, LI.getPointerOperand()->getType()); LI.setOperand(0, NewPtr); - if (LI.getAlignment() || !isTypeAlignSufficient(LI.getType())) - LI.setAlignment(getAdjustedAlign()); + LI.setAlignment(getPartitionTypeAlign(LI.getType())); DEBUG(dbgs() << " to: " << LI << "\n"); deleteIfTriviallyDead(OldOp); @@ -2345,12 +2362,7 @@ private: Value *NewPtr = getAdjustedAllocaPtr(IRB, SI.getPointerOperand()->getType()); SI.setOperand(1, NewPtr); - if (SI.getAlignment() || - !isTypeAlignSufficient(SI.getValueOperand()->getType())) - SI.setAlignment(getAdjustedAlign()); - if (SI.getAlignment()) - SI.setAlignment(MinAlign(NewAI.getAlignment(), - BeginOffset - NewAllocaBeginOffset)); + SI.setAlignment(getPartitionTypeAlign(SI.getValueOperand()->getType())); DEBUG(dbgs() << " to: " << SI << "\n"); deleteIfTriviallyDead(OldOp); @@ -2367,7 +2379,7 @@ private: if (!isa<Constant>(II.getLength())) { II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType())); Type *CstTy = II.getAlignmentCst()->getType(); - II.setAlignment(ConstantInt::get(CstTy, getAdjustedAlign())); + II.setAlignment(ConstantInt::get(CstTy, getPartitionAlign())); deleteIfTriviallyDead(OldPtr); return false; @@ -2391,7 +2403,7 @@ private: CallInst *New = IRB.CreateMemSet(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType()), - II.getValue(), Size, getAdjustedAlign(), + II.getValue(), Size, getPartitionAlign(), II.isVolatile()); (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); @@ -2481,7 +2493,7 @@ private: unsigned Align = II.getAlignment(); if (Align > 1) Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(), - MinAlign(II.getAlignment(), getAdjustedAlign())); + MinAlign(II.getAlignment(), getPartitionAlign())); // For unsplit intrinsics, we simply modify the source and destination // pointers in place. This isn't just an optimization, it is a matter of diff --git a/llvm/test/Transforms/SROA/alignment.ll b/llvm/test/Transforms/SROA/alignment.ll index 4f35a8a0ee3..f8f3270dbb9 100644 --- a/llvm/test/Transforms/SROA/alignment.ll +++ b/llvm/test/Transforms/SROA/alignment.ll @@ -31,8 +31,8 @@ entry: define void @test2() { ; CHECK: @test2 ; CHECK: alloca i16 -; CHECK: load i8* %{{.*}}, align 1 -; CHECK: store i8 42, i8* %{{.*}}, align 1 +; CHECK: load i8* %{{.*}} +; CHECK: store i8 42, i8* %{{.*}} ; CHECK: ret void entry: @@ -41,8 +41,8 @@ entry: %cast1 = bitcast i8* %gep1 to i16* store volatile i16 0, i16* %cast1 %gep2 = getelementptr { i8, i8, i8, i8 }* %a, i32 0, i32 2 - %result = load i8* %gep2, align 2 - store i8 42, i8* %gep2, align 2 + %result = load i8* %gep2 + store i8 42, i8* %gep2 ret void } @@ -114,3 +114,58 @@ entry: ret void } + +define void @test5() { +; Test that we preserve underaligned loads and stores when splitting. +; CHECK: @test5 +; CHECK: alloca [9 x i8] +; CHECK: alloca [9 x i8] +; CHECK: store volatile double 0.0{{.*}}, double* %{{.*}}, align 1 +; CHECK: load i16* %{{.*}}, align 1 +; CHECK: load double* %{{.*}}, align 1 +; CHECK: store volatile double %{{.*}}, double* %{{.*}}, align 1 +; CHECK: load i16* %{{.*}}, align 1 +; CHECK: ret void + +entry: + %a = alloca [18 x i8] + %raw1 = getelementptr inbounds [18 x i8]* %a, i32 0, i32 0 + %ptr1 = bitcast i8* %raw1 to double* + store volatile double 0.0, double* %ptr1, align 1 + %weird_gep1 = getelementptr inbounds [18 x i8]* %a, i32 0, i32 7 + %weird_cast1 = bitcast i8* %weird_gep1 to i16* + %weird_load1 = load i16* %weird_cast1, align 1 + + %raw2 = getelementptr inbounds [18 x i8]* %a, i32 0, i32 9 + %ptr2 = bitcast i8* %raw2 to double* + %d1 = load double* %ptr1, align 1 + store volatile double %d1, double* %ptr2, align 1 + %weird_gep2 = getelementptr inbounds [18 x i8]* %a, i32 0, i32 16 + %weird_cast2 = bitcast i8* %weird_gep2 to i16* + %weird_load2 = load i16* %weird_cast2, align 1 + + ret void +} + +define void @test6() { +; Test that we promote alignment when the underlying alloca switches to one +; that innately provides it. +; CHECK: @test6 +; CHECK: alloca double +; CHECK: alloca double +; CHECK-NOT: align +; CHECK: ret void + +entry: + %a = alloca [16 x i8] + %raw1 = getelementptr inbounds [16 x i8]* %a, i32 0, i32 0 + %ptr1 = bitcast i8* %raw1 to double* + store volatile double 0.0, double* %ptr1, align 1 + + %raw2 = getelementptr inbounds [16 x i8]* %a, i32 0, i32 8 + %ptr2 = bitcast i8* %raw2 to double* + %val = load double* %ptr1, align 1 + store volatile double %val, double* %ptr2, align 1 + + ret void +} |