summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFoldOperands.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp126
1 files changed, 126 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 3f64cf84c69..b55dee68d51 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -304,6 +304,126 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
return;
}
+static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
+ int32_t LHS, int32_t RHS) {
+ switch (Opcode) {
+ case AMDGPU::V_AND_B32_e64:
+ case AMDGPU::S_AND_B32:
+ Result = LHS & RHS;
+ return true;
+ case AMDGPU::V_OR_B32_e64:
+ case AMDGPU::S_OR_B32:
+ Result = LHS | RHS;
+ return true;
+ case AMDGPU::V_XOR_B32_e64:
+ case AMDGPU::S_XOR_B32:
+ Result = LHS ^ RHS;
+ return true;
+ default:
+ return false;
+ }
+}
+
+static unsigned getMovOpc(bool IsScalar) {
+ return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
+}
+
+// Try to simplify operations with a constant that may appear after instruction
+// selection.
+static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
+ const SIInstrInfo *TII,
+ MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+
+ if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
+ Opc == AMDGPU::S_NOT_B32) {
+ MachineOperand &Src0 = MI->getOperand(1);
+ if (Src0.isImm()) {
+ Src0.setImm(~Src0.getImm());
+ MI->setDesc(TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
+ return true;
+ }
+
+ return false;
+ }
+
+ if (!MI->isCommutable())
+ return false;
+
+ int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+ int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
+
+ MachineOperand *Src0 = &MI->getOperand(Src0Idx);
+ MachineOperand *Src1 = &MI->getOperand(Src1Idx);
+ if (!Src0->isImm() && !Src1->isImm())
+ return false;
+
+ // and k0, k1 -> v_mov_b32 (k0 & k1)
+ // or k0, k1 -> v_mov_b32 (k0 | k1)
+ // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
+ if (Src0->isImm() && Src1->isImm()) {
+ int32_t NewImm;
+ if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm()))
+ return false;
+
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg());
+
+ Src0->setImm(NewImm);
+ MI->RemoveOperand(Src1Idx);
+ MI->setDesc(TII->get(getMovOpc(IsSGPR)));
+ return true;
+ }
+
+ if (Src0->isImm() && !Src1->isImm()) {
+ std::swap(Src0, Src1);
+ std::swap(Src0Idx, Src1Idx);
+ }
+
+ int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
+ if (Opc == AMDGPU::V_OR_B32_e64 || Opc == AMDGPU::S_OR_B32) {
+ if (Src1Val == 0) {
+ // y = or x, 0 => y = copy x
+ MI->RemoveOperand(Src1Idx);
+ MI->setDesc(TII->get(AMDGPU::COPY));
+ } else if (Src1Val == -1) {
+ // y = or x, -1 => y = v_mov_b32 -1
+ MI->RemoveOperand(Src1Idx);
+ MI->setDesc(TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32)));
+ } else
+ return false;
+
+ return true;
+ }
+
+ if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 ||
+ MI->getOpcode() == AMDGPU::S_AND_B32) {
+ if (Src1Val == 0) {
+ // y = and x, 0 => y = v_mov_b32 0
+ MI->RemoveOperand(Src0Idx);
+ MI->setDesc(TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32)));
+ } else if (Src1Val == -1) {
+ // y = and x, -1 => y = copy x
+ MI->RemoveOperand(Src1Idx);
+ MI->setDesc(TII->get(AMDGPU::COPY));
+ } else
+ return false;
+
+ return true;
+ }
+
+ if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 ||
+ MI->getOpcode() == AMDGPU::S_XOR_B32) {
+ if (Src1Val == 0) {
+ // y = xor x, 0 => y = copy x
+ MI->RemoveOperand(Src1Idx);
+ MI->setDesc(TII->get(AMDGPU::COPY));
+ }
+ }
+
+ return false;
+}
+
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
@@ -389,6 +509,12 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
}
DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
+
+ // Folding the immediate may reveal operations that can be constant
+ // folded or replaced with a copy. This can happen for example after
+ // frame indices are lowered to constants or from splitting 64-bit
+ // constants.
+ tryConstantFoldOp(MRI, TII, Fold.UseMI);
}
}
}
OpenPOWER on IntegriCloud