diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-04-16 01:46:49 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-04-16 01:46:49 +0000 |
commit | b6be2027794fe03afa665c7c9ae727d032fafa95 (patch) | |
tree | b43a7e26199e324b1b53954a3eb0f157bf8ed0a6 /llvm/lib/Target | |
parent | 190b232d73761ce785f7b3465fd5b3baf4095200 (diff) | |
download | bcm5719-llvm-b6be2027794fe03afa665c7c9ae727d032fafa95.tar.gz bcm5719-llvm-b6be2027794fe03afa665c7c9ae727d032fafa95.zip |
AMDGPU: Use s_addk_i32 / s_mulk_i32
llvm-svn: 266506
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp | 57 |
1 files changed, 45 insertions, 12 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index b48b9ad75ec..ad3f63d2cea 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -198,6 +198,10 @@ static MachineOperand copyRegOperandAsImplicit(const MachineOperand &Orig) { Orig.isInternalRead()); } +static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) { + return isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4); +} + bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const SIInstrInfo *TII = @@ -214,18 +218,6 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { Next = std::next(I); MachineInstr &MI = *I; - // Try to use S_MOVK_I32, which will save 4 bytes for small immediates. - if (MI.getOpcode() == AMDGPU::S_MOV_B32) { - const MachineOperand &Src = MI.getOperand(1); - - if (Src.isImm()) { - if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4)) - MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); - } - - continue; - } - if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) { // If this has a literal constant source that is the same as the // reversed bits of an inline immediate, replace with a bitreverse of @@ -250,6 +242,47 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { } } + // FIXME: We also need to consider movs of constant operands since + // immediate operands are not folded if they have more than one use, and + // the operand folding pass is unaware if the immediate will be free since + // it won't know if the src == dest constraint will end up being + // satisfied. + if (MI.getOpcode() == AMDGPU::S_ADD_I32 || + MI.getOpcode() == AMDGPU::S_MUL_I32) { + const MachineOperand &Dest = MI.getOperand(0); + const MachineOperand &Src0 = MI.getOperand(1); + const MachineOperand &Src1 = MI.getOperand(2); + + // FIXME: This could work better if hints worked with subregisters. If + // we have a vector add of a constant, we usually don't get the correct + // allocation due to the subregister usage. + if (TargetRegisterInfo::isVirtualRegister(Dest.getReg()) && + Src0.isReg()) { + MRI.setRegAllocationHint(Dest.getReg(), 0, Src0.getReg()); + continue; + } + + if (Src0.isReg() && Src0.getReg() == Dest.getReg()) { + if (Src1.isImm() && isKImmOperand(TII, Src1)) { + unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ? + AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32; + + MI.setDesc(TII->get(Opc)); + MI.tieOperands(0, 1); + } + } + } + + // Try to use S_MOVK_I32, which will save 4 bytes for small immediates. + if (MI.getOpcode() == AMDGPU::S_MOV_B32) { + const MachineOperand &Src = MI.getOperand(1); + + if (Src.isImm() && isKImmOperand(TII, Src)) + MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); + + continue; + } + if (!TII->hasVALU32BitEncoding(MI.getOpcode())) continue; |