summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp24
1 files changed, 24 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index a7e12996e5f..b48b9ad75ec 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -226,6 +226,30 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
continue;
}
+ if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
+ // If this has a literal constant source that is the same as the
+ // reversed bits of an inline immediate, replace with a bitreverse of
+ // that constant. This saves 4 bytes in the common case of materializing
+ // sign bits.
+
+ // Test if we are after regalloc. We only want to do this after any
+ // optimizations happen because this will confuse them.
+ // XXX - not exactly a check for post-regalloc run.
+ MachineOperand &Src = MI.getOperand(1);
+ if (Src.isImm() &&
+ TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) {
+ int64_t Imm = Src.getImm();
+ if (isInt<32>(Imm) && !TII->isInlineConstant(Src, 4)) {
+ int32_t ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Imm));
+ if (ReverseImm >= -16 && ReverseImm <= 64) {
+ MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
+ Src.setImm(ReverseImm);
+ continue;
+ }
+ }
+ }
+ }
+
if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
continue;
OpenPOWER on IntegriCloud