summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorTim Renouf <tpr.llvm@botech.co.uk>2017-10-12 16:16:41 +0000
committerTim Renouf <tpr.llvm@botech.co.uk>2017-10-12 16:16:41 +0000
commitc8ffffe4625bc0b0f790824eaa2a591943dff862 (patch)
treef0c8c34ad492ba96e555fb009245bc688f90fdc4 /llvm/lib
parent3e0199f7ebab06654168e72add49b7f8eed75b27 (diff)
downloadbcm5719-llvm-c8ffffe4625bc0b0f790824eaa2a591943dff862.tar.gz
bcm5719-llvm-c8ffffe4625bc0b0f790824eaa2a591943dff862.zip
[AMDGPU] For amdpal, widen interpolation mode workaround
Summary: The interpolation mode workaround ensures that at least one interpolation mode is enabled in PSInputAddr. It does not also check PSInputEna on the basis that the user might enable bits in that depending on run-time state. However, for amdpal os type, the user does not enable some bits after compilation based on run-time states; the register values being generated here are the final ones set in the hardware. Therefore, apply the workaround to PSInputAddr and PSInputEnable together. (The case where a bit is set in PSInputAddr but not in PSInputEnable is where the frontend set up an input arg for a particular interpolation mode, but nothing uses that input arg. Really we should have an earlier pass that removes such an arg.) Reviewers: arsenm, nhaehnle, dstuttard Subscribers: kzhuravl, wdng, yaxunl, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D37758 llvm-svn: 315591
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp33
1 files changed, 25 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index fac2b03fa2f..2bc3d7fa508 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1493,14 +1493,31 @@ SDValue SITargetLowering::LowerFormalArguments(
// - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
// - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
// enabled too.
- if (CallConv == CallingConv::AMDGPU_PS &&
- ((Info->getPSInputAddr() & 0x7F) == 0 ||
- ((Info->getPSInputAddr() & 0xF) == 0 &&
- Info->isPSInputAllocated(11)))) {
- CCInfo.AllocateReg(AMDGPU::VGPR0);
- CCInfo.AllocateReg(AMDGPU::VGPR1);
- Info->markPSInputAllocated(0);
- Info->markPSInputEnabled(0);
+ if (CallConv == CallingConv::AMDGPU_PS) {
+ if ((Info->getPSInputAddr() & 0x7F) == 0 ||
+ ((Info->getPSInputAddr() & 0xF) == 0 &&
+ Info->isPSInputAllocated(11))) {
+ CCInfo.AllocateReg(AMDGPU::VGPR0);
+ CCInfo.AllocateReg(AMDGPU::VGPR1);
+ Info->markPSInputAllocated(0);
+ Info->markPSInputEnabled(0);
+ }
+ if (Subtarget->isAmdPalOS()) {
+ // For isAmdPalOS, the user does not enable some bits after compilation
+ // based on run-time states; the register values being generated here are
+ // the final ones set in hardware. Therefore we need to apply the
+ // workaround to PSInputAddr and PSInputEnable together. (The case where
+ // a bit is set in PSInputAddr but not PSInputEnable is where the
+ // frontend set up an input arg for a particular interpolation mode, but
+ // nothing uses that input arg. Really we should have an earlier pass
+ // that removes such an arg.)
+ unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
+ if ((PsInputBits & 0x7F) == 0 ||
+ ((PsInputBits & 0xF) == 0 &&
+ (PsInputBits >> 11 & 1)))
+ Info->markPSInputEnabled(
+ countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
+ }
}
assert(!Info->hasDispatchPtr() &&
OpenPOWER on IntegriCloud