summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2017-04-24 17:17:36 +0000
committerNicolai Haehnle <nhaehnle@gmail.com>2017-04-24 17:17:36 +0000
commit5dea6451389b97186760db15c3b1c3fc033d5ad1 (patch)
tree7ac128d1e56961604bfeff7204b86bf63d967728 /llvm/lib
parenta266923d5749d98464d662ba23149eb082eb01df (diff)
downloadbcm5719-llvm-5dea6451389b97186760db15c3b1c3fc033d5ad1.tar.gz
bcm5719-llvm-5dea6451389b97186760db15c3b1c3fc033d5ad1.zip
AMDGPU: Move v_readlane lane select from VGPR to SGPR
Summary: Fix a compiler bug when the lane select happens to end up in a VGPR. Clarify the semantic of the corresponding intrinsic to be that of the corresponding GLSL: the lane select must be uniform across a wave front, otherwise results are undefined. Reviewers: arsenm Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D32343 llvm-svn: 301197
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp13
1 files changed, 13 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d51110bcbd6..c5af8a1ad92 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2640,6 +2640,19 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
return;
+ // Special case: V_READLANE_B32 accepts only immediate or SGPR operands for
+ // lane select. Fix up using V_READFIRSTLANE, since we assume that the lane
+ // select is uniform.
+ if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() &&
+ RI.isVGPR(MRI, Src1.getReg())) {
+ unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ const DebugLoc &DL = MI.getDebugLoc();
+ BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
+ .add(Src1);
+ Src1.ChangeToRegister(Reg, false);
+ return;
+ }
+
// We do not use commuteInstruction here because it is too aggressive and will
// commute if it is possible. We only want to commute here if it improves
// legality. This can be called a fairly large number of times so don't waste
OpenPOWER on IntegriCloud