diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFoldOperands.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 126 |
1 files changed, 126 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 3f64cf84c69..b55dee68d51 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -304,6 +304,126 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, return; } +static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, + int32_t LHS, int32_t RHS) { + switch (Opcode) { + case AMDGPU::V_AND_B32_e64: + case AMDGPU::S_AND_B32: + Result = LHS & RHS; + return true; + case AMDGPU::V_OR_B32_e64: + case AMDGPU::S_OR_B32: + Result = LHS | RHS; + return true; + case AMDGPU::V_XOR_B32_e64: + case AMDGPU::S_XOR_B32: + Result = LHS ^ RHS; + return true; + default: + return false; + } +} + +static unsigned getMovOpc(bool IsScalar) { + return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; +} + +// Try to simplify operations with a constant that may appear after instruction +// selection. +static bool tryConstantFoldOp(MachineRegisterInfo &MRI, + const SIInstrInfo *TII, + MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + + if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 || + Opc == AMDGPU::S_NOT_B32) { + MachineOperand &Src0 = MI->getOperand(1); + if (Src0.isImm()) { + Src0.setImm(~Src0.getImm()); + MI->setDesc(TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32))); + return true; + } + + return false; + } + + if (!MI->isCommutable()) + return false; + + int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); + int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); + + MachineOperand *Src0 = &MI->getOperand(Src0Idx); + MachineOperand *Src1 = &MI->getOperand(Src1Idx); + if (!Src0->isImm() && !Src1->isImm()) + return false; + + // and k0, k1 -> v_mov_b32 (k0 & k1) + // or k0, k1 -> v_mov_b32 (k0 | k1) + // xor k0, k1 -> v_mov_b32 (k0 ^ k1) + if (Src0->isImm() && Src1->isImm()) { + int32_t NewImm; + if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm())) + return false; + + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg()); + + Src0->setImm(NewImm); + MI->RemoveOperand(Src1Idx); + MI->setDesc(TII->get(getMovOpc(IsSGPR))); + return true; + } + + if (Src0->isImm() && !Src1->isImm()) { + std::swap(Src0, Src1); + std::swap(Src0Idx, Src1Idx); + } + + int32_t Src1Val = static_cast<int32_t>(Src1->getImm()); + if (Opc == AMDGPU::V_OR_B32_e64 || Opc == AMDGPU::S_OR_B32) { + if (Src1Val == 0) { + // y = or x, 0 => y = copy x + MI->RemoveOperand(Src1Idx); + MI->setDesc(TII->get(AMDGPU::COPY)); + } else if (Src1Val == -1) { + // y = or x, -1 => y = v_mov_b32 -1 + MI->RemoveOperand(Src1Idx); + MI->setDesc(TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32))); + } else + return false; + + return true; + } + + if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 || + MI->getOpcode() == AMDGPU::S_AND_B32) { + if (Src1Val == 0) { + // y = and x, 0 => y = v_mov_b32 0 + MI->RemoveOperand(Src0Idx); + MI->setDesc(TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32))); + } else if (Src1Val == -1) { + // y = and x, -1 => y = copy x + MI->RemoveOperand(Src1Idx); + MI->setDesc(TII->get(AMDGPU::COPY)); + } else + return false; + + return true; + } + + if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 || + MI->getOpcode() == AMDGPU::S_XOR_B32) { + if (Src1Val == 0) { + // y = xor x, 0 => y = copy x + MI->RemoveOperand(Src1Idx); + MI->setDesc(TII->get(AMDGPU::COPY)); + } + } + + return false; +} + bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; @@ -389,6 +509,12 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { } DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << Fold.UseOpNo << " of " << *Fold.UseMI << '\n'); + + // Folding the immediate may reveal operations that can be constant + // folded or replaced with a copy. This can happen for example after + // frame indices are lowered to constants or from splitting 64-bit + // constants. + tryConstantFoldOp(MRI, TII, Fold.UseMI); } } } |