diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-04-11 20:59:54 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-04-11 20:59:54 +0000 |
commit | e1f030ca66cc64535205e38a84ffabfbd230abbf (patch) | |
tree | d0f2e98b20f3ebcc2acebcbffafccb708cca8718 /llvm | |
parent | ef309f432638957a6d61ff45ee982e05797addf2 (diff) | |
download | bcm5719-llvm-e1f030ca66cc64535205e38a84ffabfbd230abbf.tar.gz bcm5719-llvm-e1f030ca66cc64535205e38a84ffabfbd230abbf.zip |
R600: Check if a sextload should be used for parameter loads.
Through some oddity where truncate (sextload x) isn't folded into
an anyextload for vectors, the sextload remains if the
vector isn't immediately scalarized. This keeps the expected
zextload instructions in the kernel-args test when small type
vectors aren't scalarized.
llvm-svn: 206070
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/R600/R600ISelLowering.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIISelLowering.cpp | 25 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIISelLowering.h | 2 |
3 files changed, 20 insertions, 14 deletions
diff --git a/llvm/lib/Target/R600/R600ISelLowering.cpp b/llvm/lib/Target/R600/R600ISelLowering.cpp index 1e6582296ac..be810851a17 100644 --- a/llvm/lib/Target/R600/R600ISelLowering.cpp +++ b/llvm/lib/Target/R600/R600ISelLowering.cpp @@ -1394,7 +1394,12 @@ SDValue R600TargetLowering::LowerFormalArguments( // The first 36 bytes of the input buffer contains information about // thread group and global sizes. - SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain, + + // FIXME: This should really check the extload type, but the handling of + // extload vecto parameters seems to be broken. + //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + ISD::LoadExtType Ext = ISD::SEXTLOAD; + SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain, DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32), MachinePointerInfo(UndefValue::get(PtrTy)), MemVT, false, false, 4); diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index b9295ff466c..d08156f7f98 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -224,7 +224,7 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL, SDValue Chain, - unsigned Offset) const { + unsigned Offset, bool Signed) const { MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), AMDGPUAS::CONSTANT_ADDRESS); @@ -232,7 +232,7 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64); SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, DAG.getConstant(Offset, MVT::i64)); - return DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain, Ptr, + return DAG.getExtLoad(Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL, VT, Chain, Ptr, MachinePointerInfo(UndefValue::get(PtrTy)), MemVT, false, false, MemVT.getSizeInBits() >> 3); @@ -340,7 +340,8 @@ SDValue SITargetLowering::LowerFormalArguments( // The first 36 bytes of the input buffer contains information about // thread group and global sizes. SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(), - 36 + VA.getLocMemOffset()); + 36 + VA.getLocMemOffset(), + Ins[i].Flags.isSExt()); InVals.push_back(Arg); continue; } @@ -533,23 +534,23 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (IntrinsicID) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); case Intrinsic::r600_read_ngroups_x: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0, false); case Intrinsic::r600_read_ngroups_y: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4, false); case Intrinsic::r600_read_ngroups_z: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8, false); case Intrinsic::r600_read_global_size_x: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12, false); case Intrinsic::r600_read_global_size_y: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16, false); case Intrinsic::r600_read_global_size_z: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20, false); case Intrinsic::r600_read_local_size_x: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24, false); case Intrinsic::r600_read_local_size_y: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28, false); case Intrinsic::r600_read_local_size_z: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32, false); case Intrinsic::r600_read_tgid_x: return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 0), VT); diff --git a/llvm/lib/Target/R600/SIISelLowering.h b/llvm/lib/Target/R600/SIISelLowering.h index ca73f53f222..f3a52cb7f96 100644 --- a/llvm/lib/Target/R600/SIISelLowering.h +++ b/llvm/lib/Target/R600/SIISelLowering.h @@ -22,7 +22,7 @@ namespace llvm { class SITargetLowering : public AMDGPUTargetLowering { SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL, - SDValue Chain, unsigned Offset) const; + SDValue Chain, unsigned Offset, bool Signed) const; SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; |