diff options
author | Evgeniy Stepanov <eugeni.stepanov@gmail.com> | 2018-07-14 01:20:53 +0000 |
---|---|---|
committer | Evgeniy Stepanov <eugeni.stepanov@gmail.com> | 2018-07-14 01:20:53 +0000 |
commit | 1971ba097d93cdb33672744075272728317f1dce (patch) | |
tree | d858ccdd545692772fd2f02feb6b67f14f0d4ea8 /llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | |
parent | a24fe067c0ae7664a662117f0fe3e4cdad0e7e43 (diff) | |
download | bcm5719-llvm-1971ba097d93cdb33672744075272728317f1dce.tar.gz bcm5719-llvm-1971ba097d93cdb33672744075272728317f1dce.zip |
Revert "AMDGPU: Fix handling of alignment padding in DAG argument lowering"
This reverts commit r337021.
WARNING: MemorySanitizer: use-of-uninitialized-value
#0 0x1415cd65 in void write_signed<long>(llvm::raw_ostream&, long, unsigned long, llvm::IntegerStyle) /code/llvm-project/llvm/lib/Support/NativeFormatting.cpp:95:7
#1 0x1415c900 in llvm::write_integer(llvm::raw_ostream&, long, unsigned long, llvm::IntegerStyle) /code/llvm-project/llvm/lib/Support/NativeFormatting.cpp:121:3
#2 0x1472357f in llvm::raw_ostream::operator<<(long) /code/llvm-project/llvm/lib/Support/raw_ostream.cpp:117:3
#3 0x13bb9d4 in llvm::raw_ostream::operator<<(int) /code/llvm-project/llvm/include/llvm/Support/raw_ostream.h:210:18
#4 0x3c2bc18 in void printField<unsigned int, &(amd_kernel_code_s::amd_kernel_code_version_major)>(llvm::StringRef, amd_kernel_code_s const&, llvm::raw_ostream&) /code/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp:78:23
#5 0x3c250ba in llvm::printAmdKernelCodeField(amd_kernel_code_s const&, int, llvm::raw_ostream&) /code/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp:104:5
#6 0x3c27ca3 in llvm::dumpAmdKernelCode(amd_kernel_code_s const*, llvm::raw_ostream&, char const*) /code/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp:113:5
#7 0x3a46e6c in llvm::AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(amd_kernel_code_s const&) /code/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp:161:3
#8 0xd371e4 in llvm::AMDGPUAsmPrinter::EmitFunctionBodyStart() /code/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp:204:26
[...]
Uninitialized value was created by an allocation of 'KernelCode' in the stack frame of function '_ZN4llvm16AMDGPUAsmPrinter21EmitFunctionBodyStartEv'
#0 0xd36650 in llvm::AMDGPUAsmPrinter::EmitFunctionBodyStart() /code/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp:192
llvm-svn: 337079
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 183 |
1 files changed, 75 insertions, 108 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 583a09e34ab..acdedab7e13 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -30,7 +30,6 @@ #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -41,6 +40,18 @@ #include "llvm/Support/KnownBits.h" using namespace llvm; +static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + MachineFunction &MF = State.getMachineFunction(); + AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>(); + + uint64_t Offset = MFI->allocateKernArg(LocVT.getStoreSize(), + ArgFlags.getOrigAlign()); + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return true; +} + static bool allocateCCRegs(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, @@ -899,118 +910,74 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC, /// for each individual part is i8. We pass the memory type as LocVT to the /// calling convention analysis function and the register type (Ins[x].VT) as /// the ValVT. -void AMDGPUTargetLowering::analyzeFormalArgumentsCompute( - CCState &State, - const SmallVectorImpl<ISD::InputArg> &Ins) const { - const MachineFunction &MF = State.getMachineFunction(); - const Function &Fn = MF.getFunction(); - LLVMContext &Ctx = Fn.getParent()->getContext(); - const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF); - const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset(Fn); - - unsigned MaxAlign = 1; - uint64_t ExplicitArgOffset = 0; - const DataLayout &DL = Fn.getParent()->getDataLayout(); - - unsigned InIndex = 0; - - for (const Argument &Arg : Fn.args()) { - Type *BaseArgTy = Arg.getType(); - unsigned Align = DL.getABITypeAlignment(BaseArgTy); - MaxAlign = std::max(Align, MaxAlign); - unsigned AllocSize = DL.getTypeAllocSize(BaseArgTy); - - uint64_t ArgOffset = alignTo(ExplicitArgOffset, Align) + ExplicitOffset; - ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize; - - // We're basically throwing away everything passed into us and starting over - // to get accurate in-memory offsets. The "PartOffset" is completely useless - // to us as computed in Ins. - // - // We also need to figure out what type legalization is trying to do to get - // the correct memory offsets. - - SmallVector<EVT, 16> ValueVTs; - SmallVector<uint64_t, 16> Offsets; - ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, &Offsets, ArgOffset); - - for (unsigned Value = 0, NumValues = ValueVTs.size(); - Value != NumValues; ++Value) { - uint64_t BasePartOffset = Offsets[Value]; - - EVT ArgVT = ValueVTs[Value]; - EVT MemVT = ArgVT; - MVT RegisterVT = - getRegisterTypeForCallingConv(Ctx, ArgVT); - unsigned NumRegs = - getNumRegistersForCallingConv(Ctx, ArgVT); - - if (!Subtarget->isAmdHsaOS() && - (ArgVT == MVT::i16 || ArgVT == MVT::i8 || ArgVT == MVT::f16)) { - // The ABI says the caller will extend these values to 32-bits. - MemVT = ArgVT.isInteger() ? MVT::i32 : MVT::f32; - } else if (NumRegs == 1) { - // This argument is not split, so the IR type is the memory type. - if (ArgVT.isExtended()) { - // We have an extended type, like i24, so we should just use the - // register type. - MemVT = RegisterVT; - } else { - MemVT = ArgVT; - } - } else if (ArgVT.isVector() && RegisterVT.isVector() && - ArgVT.getScalarType() == RegisterVT.getScalarType()) { - assert(ArgVT.getVectorNumElements() > RegisterVT.getVectorNumElements()); - // We have a vector value which has been split into a vector with - // the same scalar type, but fewer elements. This should handle - // all the floating-point vector types. - MemVT = RegisterVT; - } else if (ArgVT.isVector() && - ArgVT.getVectorNumElements() == NumRegs) { - // This arg has been split so that each element is stored in a separate - // register. - MemVT = ArgVT.getScalarType(); - } else if (ArgVT.isExtended()) { - // We have an extended type, like i65. - MemVT = RegisterVT; +void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(CCState &State, + const SmallVectorImpl<ISD::InputArg> &Ins) const { + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + const ISD::InputArg &In = Ins[i]; + EVT MemVT; + + unsigned NumRegs = getNumRegisters(State.getContext(), In.ArgVT); + + if (!Subtarget->isAmdHsaOS() && + (In.ArgVT == MVT::i16 || In.ArgVT == MVT::i8 || In.ArgVT == MVT::f16)) { + // The ABI says the caller will extend these values to 32-bits. + MemVT = In.ArgVT.isInteger() ? MVT::i32 : MVT::f32; + } else if (NumRegs == 1) { + // This argument is not split, so the IR type is the memory type. + assert(!In.Flags.isSplit()); + if (In.ArgVT.isExtended()) { + // We have an extended type, like i24, so we should just use the register type + MemVT = In.VT; } else { - unsigned MemoryBits = ArgVT.getStoreSizeInBits() / NumRegs; - assert(ArgVT.getStoreSizeInBits() % NumRegs == 0); - if (RegisterVT.isInteger()) { - MemVT = EVT::getIntegerVT(State.getContext(), MemoryBits); - } else if (RegisterVT.isVector()) { - assert(!RegisterVT.getScalarType().isFloatingPoint()); - unsigned NumElements = RegisterVT.getVectorNumElements(); - assert(MemoryBits % NumElements == 0); - // This vector type has been split into another vector type with - // a different elements size. - EVT ScalarVT = EVT::getIntegerVT(State.getContext(), - MemoryBits / NumElements); - MemVT = EVT::getVectorVT(State.getContext(), ScalarVT, NumElements); - } else { - llvm_unreachable("cannot deduce memory type."); - } + MemVT = In.ArgVT; } - - // Convert one element vectors to scalar. - if (MemVT.isVector() && MemVT.getVectorNumElements() == 1) - MemVT = MemVT.getScalarType(); - - if (MemVT.isExtended()) { - // This should really only happen if we have vec3 arguments - assert(MemVT.isVector() && MemVT.getVectorNumElements() == 3); - MemVT = MemVT.getPow2VectorType(State.getContext()); + } else if (In.ArgVT.isVector() && In.VT.isVector() && + In.ArgVT.getScalarType() == In.VT.getScalarType()) { + assert(In.ArgVT.getVectorNumElements() > In.VT.getVectorNumElements()); + // We have a vector value which has been split into a vector with + // the same scalar type, but fewer elements. This should handle + // all the floating-point vector types. + MemVT = In.VT; + } else if (In.ArgVT.isVector() && + In.ArgVT.getVectorNumElements() == NumRegs) { + // This arg has been split so that each element is stored in a separate + // register. + MemVT = In.ArgVT.getScalarType(); + } else if (In.ArgVT.isExtended()) { + // We have an extended type, like i65. + MemVT = In.VT; + } else { + unsigned MemoryBits = In.ArgVT.getStoreSizeInBits() / NumRegs; + assert(In.ArgVT.getStoreSizeInBits() % NumRegs == 0); + if (In.VT.isInteger()) { + MemVT = EVT::getIntegerVT(State.getContext(), MemoryBits); + } else if (In.VT.isVector()) { + assert(!In.VT.getScalarType().isFloatingPoint()); + unsigned NumElements = In.VT.getVectorNumElements(); + assert(MemoryBits % NumElements == 0); + // This vector type has been split into another vector type with + // a different elements size. + EVT ScalarVT = EVT::getIntegerVT(State.getContext(), + MemoryBits / NumElements); + MemVT = EVT::getVectorVT(State.getContext(), ScalarVT, NumElements); + } else { + llvm_unreachable("cannot deduce memory type."); } + } - unsigned PartOffset = 0; - for (unsigned i = 0; i != NumRegs; ++i) { - State.addLoc(CCValAssign::getCustomMem(InIndex++, RegisterVT, - BasePartOffset + PartOffset, - MemVT.getSimpleVT(), - CCValAssign::Full)); - PartOffset += MemVT.getStoreSize(); - } + // Convert one element vectors to scalar. + if (MemVT.isVector() && MemVT.getVectorNumElements() == 1) + MemVT = MemVT.getScalarType(); + + if (MemVT.isExtended()) { + // This should really only happen if we have vec3 arguments + assert(MemVT.isVector() && MemVT.getVectorNumElements() == 3); + MemVT = MemVT.getPow2VectorType(State.getContext()); } + + assert(MemVT.isSimple()); + allocateKernArg(i, In.VT, MemVT.getSimpleVT(), CCValAssign::Full, In.Flags, + State); } } |