summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-12-10 00:39:12 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-12-10 00:39:12 +0000
commit4bd72361935300f1699d3e49ba3304f0c7631f1a (patch)
tree2d92b1bd6dc4e80a7d6622d1efbb91d843b62a43 /llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
parent86581e496b07cd975b8df2e59734b1d5341f0fa3 (diff)
downloadbcm5719-llvm-4bd72361935300f1699d3e49ba3304f0c7631f1a.tar.gz
bcm5719-llvm-4bd72361935300f1699d3e49ba3304f0c7631f1a.zip
AMDGPU: Fix handling of 16-bit immediates
Since 32-bit instructions with 32-bit input immediate behavior are used to materialize 16-bit constants in 32-bit registers for 16-bit instructions, determining the legality based on the size is incorrect. Change operands to have the size specified in the type. Also adds a workaround for a disassembler bug that produces an immediate MCOperand for an operand that is supposed to be OPERAND_REGISTER. The assembler appears to accept out of bounds immediates and truncates them, but this seems to be an issue for 32-bit already. llvm-svn: 289306
Diffstat (limited to 'llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp52
1 files changed, 41 insertions, 11 deletions
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 29cac2fbf6d..85cbadf0a57 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -329,25 +329,29 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
-
- return OpType == AMDGPU::OPERAND_REG_IMM32_INT ||
- OpType == AMDGPU::OPERAND_REG_IMM32_FP ||
- OpType == AMDGPU::OPERAND_REG_INLINE_C_INT ||
- OpType == AMDGPU::OPERAND_REG_INLINE_C_FP;
+ return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
+ OpType <= AMDGPU::OPERAND_SRC_LAST;
}
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
-
- return OpType == AMDGPU::OPERAND_REG_IMM32_FP ||
- OpType == AMDGPU::OPERAND_REG_INLINE_C_FP;
+ switch (OpType) {
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ return true;
+ default:
+ return false;
+ }
}
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
-
- return OpType == AMDGPU::OPERAND_REG_INLINE_C_INT ||
- OpType == AMDGPU::OPERAND_REG_INLINE_C_FP;
+ return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
+ OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
}
// Avoid using MCRegisterClass::getSize, since that function will go away
@@ -413,6 +417,15 @@ bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
if (Literal >= -16 && Literal <= 64)
return true;
+ // The actual type of the operand does not seem to matter as long
+ // as the bits match one of the inline immediate values. For example:
+ //
+ // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
+ // so it is a legal inline immediate.
+ //
+ // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
+ // floating-point, so it is a legal inline immediate.
+
uint32_t Val = static_cast<uint32_t>(Literal);
return (Val == FloatToBits(0.0f)) ||
(Val == FloatToBits(1.0f)) ||
@@ -426,6 +439,23 @@ bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
(Val == 0x3e22f983 && HasInv2Pi);
}
+bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
+ assert(HasInv2Pi);
+
+ if (Literal >= -16 && Literal <= 64)
+ return true;
+
+ uint16_t Val = static_cast<uint16_t>(Literal);
+ return Val == 0x3C00 || // 1.0
+ Val == 0xBC00 || // -1.0
+ Val == 0x3800 || // 0.5
+ Val == 0xB800 || // -0.5
+ Val == 0x4000 || // 2.0
+ Val == 0xC000 || // -2.0
+ Val == 0x4400 || // 4.0
+ Val == 0xC400 || // -4.0
+ Val == 0x3118; // 1/2pi
+}
} // End namespace AMDGPU
} // End namespace llvm
OpenPOWER on IntegriCloud