diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp | 71 | ||||
| -rw-r--r-- | llvm/test/Transforms/ScalarRepl/copy-aggregate.ll | 29 |
2 files changed, 83 insertions, 17 deletions
diff --git a/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp index ec298e7316d..0ec095749f9 100644 --- a/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1076,6 +1076,46 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, MarkUnsafe(Info); } +/// isHomogeneousAggregate - Check if type T is a struct or array containing +/// elements of the same type (which is always true for arrays). If so, +/// return true with NumElts and EltTy set to the number of elements and the +/// element type, respectively. +static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts, + const Type *&EltTy) { + if (const ArrayType *AT = dyn_cast<ArrayType>(T)) { + NumElts = AT->getNumElements(); + EltTy = AT->getElementType(); + return true; + } + if (const StructType *ST = dyn_cast<StructType>(T)) { + NumElts = ST->getNumContainedTypes(); + EltTy = ST->getContainedType(0); + for (unsigned n = 1; n < NumElts; ++n) { + if (ST->getContainedType(n) != EltTy) + return false; + } + return true; + } + return false; +} + +/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are +/// "homogeneous" aggregates with the same element type and number of elements. +static bool isCompatibleAggregate(const Type *T1, const Type *T2) { + if (T1 == T2) + return true; + + unsigned NumElts1, NumElts2; + const Type *EltTy1, *EltTy2; + if (isHomogeneousAggregate(T1, NumElts1, EltTy1) && + isHomogeneousAggregate(T2, NumElts2, EltTy2) && + NumElts1 == NumElts2 && + EltTy1 == EltTy2) + return true; + + return false; +} + /// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI /// alloca or has an offset and size that corresponds to a component element /// within it. The offset checked here may have been formed from a GEP with a @@ -1085,20 +1125,23 @@ void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize, AllocaInfo &Info) { // Check if this is a load/store of the entire alloca. if (Offset == 0 && MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) { - bool UsesAggregateType = (MemOpType == AI->getAllocatedType()); - // This is safe for MemIntrinsics (where MemOpType is 0), integer types - // (which are essentially the same as the MemIntrinsics, especially with - // regard to copying padding between elements), or references using the - // aggregate type of the alloca. - if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) { - if (!UsesAggregateType) { - if (isStore) - Info.isMemCpyDst = true; - else - Info.isMemCpySrc = true; - } + // This can be safe for MemIntrinsics (where MemOpType is 0) and integer + // loads/stores (which are essentially the same as the MemIntrinsics with + // regard to copying padding between elements). But, if an alloca is + // flagged as both a source and destination of such operations, we'll need + // to check later for padding between elements. + if (!MemOpType || MemOpType->isIntegerTy()) { + if (isStore) + Info.isMemCpyDst = true; + else + Info.isMemCpySrc = true; return; } + // This is also safe for references using a type that is compatible with + // the type of the alloca, so that loads/stores can be rewritten using + // insertvalue/extractvalue. + if (isCompatibleAggregate(MemOpType, AI->getAllocatedType())) + return; } // Check if the offset/size correspond to a component within the alloca type. const Type *T = AI->getAllocatedType(); @@ -1159,7 +1202,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, // address operand will be updated, so nothing else needs to be done. } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { const Type *LIType = LI->getType(); - if (LIType == AI->getAllocatedType()) { + if (isCompatibleAggregate(LIType, AI->getAllocatedType())) { // Replace: // %res = load { i32, i32 }* %alloc // with: @@ -1184,7 +1227,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { Value *Val = SI->getOperand(0); const Type *SIType = Val->getType(); - if (SIType == AI->getAllocatedType()) { + if (isCompatibleAggregate(SIType, AI->getAllocatedType())) { // Replace: // store { i32, i32 } %val, { i32, i32 }* %alloc // with: diff --git a/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll b/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll index 6e01aa3c518..997da4bdb2f 100644 --- a/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll +++ b/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll @@ -76,10 +76,33 @@ entry: %var = alloca %arr, align 4 %vari8 = bitcast %arr* %var to i8* %pi8 = bitcast %arr* %p to i8* - call void @llvm.memcpy.i32(i8* %vari8, i8* %pi8, i32 16, i32 4) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %vari8, i8* %pi8, i32 16, i32 4, i1 false) %qi8 = bitcast %arr* %q to i8* - call void @llvm.memcpy.i32(i8* %qi8, i8* %vari8, i32 16, i32 4) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %qi8, i8* %vari8, i32 16, i32 4, i1 false) ret void } -declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind +;; Check that an array alloca can be split up when it is also accessed with +;; a load or store as a homogeneous structure with the same element type and +;; number of elements as the array. +%homogeneous = type { <8 x i16>, <8 x i16>, <8 x i16> } +%wrapped_array = type { [3 x <8 x i16>] } +define void @test6(i8* %p, %wrapped_array* %arr) { +entry: +; CHECK: test6 +; CHECK: store <8 x i16> +; CHECK: store <8 x i16> +; CHECK: store <8 x i16> + %var = alloca %wrapped_array, align 16 + %res = call %homogeneous @test6callee(i8* %p) + %varcast = bitcast %wrapped_array* %var to %homogeneous* + store %homogeneous %res, %homogeneous* %varcast + %tmp1 = bitcast %wrapped_array* %arr to i8* + %tmp2 = bitcast %wrapped_array* %var to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %tmp2, i32 48, i32 16, i1 false) + ret void +} + +declare %homogeneous @test6callee(i8* nocapture) nounwind + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind |

