summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-04-16 01:46:49 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-04-16 01:46:49 +0000
commitb6be2027794fe03afa665c7c9ae727d032fafa95 (patch)
treeb43a7e26199e324b1b53954a3eb0f157bf8ed0a6 /llvm/lib/Target
parent190b232d73761ce785f7b3465fd5b3baf4095200 (diff)
downloadbcm5719-llvm-b6be2027794fe03afa665c7c9ae727d032fafa95.tar.gz
bcm5719-llvm-b6be2027794fe03afa665c7c9ae727d032fafa95.zip
AMDGPU: Use s_addk_i32 / s_mulk_i32
llvm-svn: 266506
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp57
1 files changed, 45 insertions, 12 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index b48b9ad75ec..ad3f63d2cea 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -198,6 +198,10 @@ static MachineOperand copyRegOperandAsImplicit(const MachineOperand &Orig) {
Orig.isInternalRead());
}
+static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
+ return isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4);
+}
+
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const SIInstrInfo *TII =
@@ -214,18 +218,6 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
Next = std::next(I);
MachineInstr &MI = *I;
- // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
- if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
- const MachineOperand &Src = MI.getOperand(1);
-
- if (Src.isImm()) {
- if (isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4))
- MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
- }
-
- continue;
- }
-
if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
// If this has a literal constant source that is the same as the
// reversed bits of an inline immediate, replace with a bitreverse of
@@ -250,6 +242,47 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
}
}
+ // FIXME: We also need to consider movs of constant operands since
+ // immediate operands are not folded if they have more than one use, and
+ // the operand folding pass is unaware if the immediate will be free since
+ // it won't know if the src == dest constraint will end up being
+ // satisfied.
+ if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
+ MI.getOpcode() == AMDGPU::S_MUL_I32) {
+ const MachineOperand &Dest = MI.getOperand(0);
+ const MachineOperand &Src0 = MI.getOperand(1);
+ const MachineOperand &Src1 = MI.getOperand(2);
+
+ // FIXME: This could work better if hints worked with subregisters. If
+ // we have a vector add of a constant, we usually don't get the correct
+ // allocation due to the subregister usage.
+ if (TargetRegisterInfo::isVirtualRegister(Dest.getReg()) &&
+ Src0.isReg()) {
+ MRI.setRegAllocationHint(Dest.getReg(), 0, Src0.getReg());
+ continue;
+ }
+
+ if (Src0.isReg() && Src0.getReg() == Dest.getReg()) {
+ if (Src1.isImm() && isKImmOperand(TII, Src1)) {
+ unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
+ AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
+
+ MI.setDesc(TII->get(Opc));
+ MI.tieOperands(0, 1);
+ }
+ }
+ }
+
+ // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
+ if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
+ const MachineOperand &Src = MI.getOperand(1);
+
+ if (Src.isImm() && isKImmOperand(TII, Src))
+ MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
+
+ continue;
+ }
+
if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
continue;
OpenPOWER on IntegriCloud