summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/FunctionLoweringInfo.h1
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp13
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp65
-rw-r--r--llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll7
4 files changed, 54 insertions, 32 deletions
diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index 5fe4f89f34b..7c658515de0 100644
--- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -246,6 +246,7 @@ public:
return 0;
unsigned &R = ValueMap[V];
assert(R == 0 && "Already initialized this value register!");
+ assert(VirtReg2Value.empty());
return R = CreateRegs(V->getType());
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index d3c31911d67..fba728625b0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -579,9 +579,18 @@ FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const
const Value *
FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) {
if (VirtReg2Value.empty()) {
+ SmallVector<EVT, 4> ValueVTs;
for (auto &P : ValueMap) {
- VirtReg2Value[P.second] = P.first;
+ ValueVTs.clear();
+ ComputeValueVTs(*TLI, Fn->getParent()->getDataLayout(),
+ P.first->getType(), ValueVTs);
+ unsigned Reg = P.second;
+ for (EVT VT : ValueVTs) {
+ unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
+ for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+ VirtReg2Value[Reg++] = P.first;
+ }
}
}
- return VirtReg2Value[Vreg];
+ return VirtReg2Value.lookup(Vreg);
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 9f5198042e4..dbcd1bf0c76 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9303,42 +9303,49 @@ void SITargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
Known.Zero.setHighBits(AssumeFrameIndexHighZeroBits);
}
+LLVM_ATTRIBUTE_UNUSED
+static bool isCopyFromRegOfInlineAsm(const SDNode *N) {
+ assert(N->getOpcode() == ISD::CopyFromReg);
+ do {
+ // Follow the chain until we find an INLINEASM node.
+ N = N->getOperand(0).getNode();
+ if (N->getOpcode() == ISD::INLINEASM)
+ return true;
+ } while (N->getOpcode() == ISD::CopyFromReg);
+ return false;
+}
+
bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
FunctionLoweringInfo * FLI, LegacyDivergenceAnalysis * KDA) const
{
switch (N->getOpcode()) {
- case ISD::Register:
case ISD::CopyFromReg:
{
- const RegisterSDNode *R = nullptr;
- if (N->getOpcode() == ISD::Register) {
- R = dyn_cast<RegisterSDNode>(N);
- }
- else {
- R = dyn_cast<RegisterSDNode>(N->getOperand(1));
- }
- if (R)
- {
- const MachineFunction * MF = FLI->MF;
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
- const MachineRegisterInfo &MRI = MF->getRegInfo();
- const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo();
- unsigned Reg = R->getReg();
- if (TRI.isPhysicalRegister(Reg))
- return TRI.isVGPR(MRI, Reg);
-
- if (MRI.isLiveIn(Reg)) {
- // workitem.id.x workitem.id.y workitem.id.z
- // Any VGPR formal argument is also considered divergent
- if (TRI.isVGPR(MRI, Reg))
- return true;
- // Formal arguments of non-entry functions
- // are conservatively considered divergent
- else if (!AMDGPU::isEntryFunctionCC(FLI->Fn->getCallingConv()))
- return true;
- }
- return !KDA || KDA->isDivergent(FLI->getValueFromVirtualReg(Reg));
+ const RegisterSDNode *R = cast<RegisterSDNode>(N->getOperand(1));
+ const MachineFunction * MF = FLI->MF;
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo();
+ unsigned Reg = R->getReg();
+ if (TRI.isPhysicalRegister(Reg))
+ return !TRI.isSGPRReg(MRI, Reg);
+
+ if (MRI.isLiveIn(Reg)) {
+ // workitem.id.x workitem.id.y workitem.id.z
+ // Any VGPR formal argument is also considered divergent
+ if (!TRI.isSGPRReg(MRI, Reg))
+ return true;
+ // Formal arguments of non-entry functions
+ // are conservatively considered divergent
+ else if (!AMDGPU::isEntryFunctionCC(FLI->Fn->getCallingConv()))
+ return true;
+ return false;
}
+ const Value *V = FLI->getValueFromVirtualReg(Reg);
+ if (V)
+ return KDA->isDivergent(V);
+ assert(Reg == FLI->DemoteRegister || isCopyFromRegOfInlineAsm(N));
+ return !TRI.isSGPRReg(MRI, Reg);
}
break;
case ISD::LOAD: {
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
index 3c92e8e5cba..6beac4904a7 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
@@ -501,7 +501,12 @@ entry:
; GCN-LABEL: {{^}}sdwa_crash_inlineasm_def:
; GCN: s_mov_b32 s{{[0-9]+}}, 0xffff
; GCN: v_and_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
-; GCN: v_or_b32_e32 v{{[0-9]+}}, 0x10000,
+;
+; TODO: Why is the constant not peepholed into the v_or_b32_e32?
+;
+; NOSDWA: s_mov_b32 [[CONST:s[0-9]+]], 0x10000
+; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, s0,
+; SDWA: v_or_b32_e32 v{{[0-9]+}}, 0x10000,
define amdgpu_kernel void @sdwa_crash_inlineasm_def() #0 {
bb:
br label %bb1
OpenPOWER on IntegriCloud