summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-12-10 00:39:12 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-12-10 00:39:12 +0000
commit4bd72361935300f1699d3e49ba3304f0c7631f1a (patch)
tree2d92b1bd6dc4e80a7d6622d1efbb91d843b62a43 /llvm/lib/Target/AMDGPU
parent86581e496b07cd975b8df2e59734b1d5341f0fa3 (diff)
downloadbcm5719-llvm-4bd72361935300f1699d3e49ba3304f0c7631f1a.tar.gz
bcm5719-llvm-4bd72361935300f1699d3e49ba3304f0c7631f1a.zip
AMDGPU: Fix handling of 16-bit immediates
Since 32-bit instructions with 32-bit input immediate behavior are used to materialize 16-bit constants in 32-bit registers for 16-bit instructions, determining the legality based on the size is incorrect. Change operands to have the size specified in the type. Also adds a workaround for a disassembler bug that produces an immediate MCOperand for an operand that is supposed to be OPERAND_REGISTER. The assembler appears to accept out of bounds immediates and truncates them, but this seems to be an issue for 32-bit already. llvm-svn: 289306
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp205
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp144
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h4
-rw-r--r--llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp83
-rw-r--r--llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp63
-rw-r--r--llvm/lib/Target/AMDGPU/SIDefines.h29
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp118
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h89
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td48
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td5
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp13
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.h12
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td22
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp19
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp52
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h35
-rw-r--r--llvm/lib/Target/AMDGPU/VOP2Instructions.td6
19 files changed, 741 insertions, 222 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 453d0d91d3e..80c815e830b 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -215,6 +215,10 @@ public:
return isRegKind() || isInlinableImm(type);
}
+ bool isRegOrImmWithInt16InputMods() const {
+ return isRegOrImmWithInputMods(MVT::i16);
+ }
+
bool isRegOrImmWithInt32InputMods() const {
return isRegOrImmWithInputMods(MVT::i32);
}
@@ -223,6 +227,10 @@ public:
return isRegOrImmWithInputMods(MVT::i64);
}
+ bool isRegOrImmWithFP16InputMods() const {
+ return isRegOrImmWithInputMods(MVT::f16);
+ }
+
bool isRegOrImmWithFP32InputMods() const {
return isRegOrImmWithInputMods(MVT::f32);
}
@@ -282,6 +290,10 @@ public:
bool isRegClass(unsigned RCID) const;
+ bool isSCSrcB16() const {
+ return isRegClass(AMDGPU::SReg_32RegClassID) || isInlinableImm(MVT::i16);
+ }
+
bool isSCSrcB32() const {
return isRegClass(AMDGPU::SReg_32RegClassID) || isInlinableImm(MVT::i32);
}
@@ -290,6 +302,10 @@ public:
return isRegClass(AMDGPU::SReg_64RegClassID) || isInlinableImm(MVT::i64);
}
+ bool isSCSrcF16() const {
+ return isRegClass(AMDGPU::SReg_32RegClassID) || isInlinableImm(MVT::f16);
+ }
+
bool isSCSrcF32() const {
return isRegClass(AMDGPU::SReg_32RegClassID) || isInlinableImm(MVT::f32);
}
@@ -302,6 +318,10 @@ public:
return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
}
+ bool isSSrcB16() const {
+ return isSCSrcB16() || isLiteralImm(MVT::i16);
+ }
+
bool isSSrcB64() const {
// TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
// See isVSrc64().
@@ -316,6 +336,10 @@ public:
return isSCSrcB64() || isLiteralImm(MVT::f64);
}
+ bool isSSrcF16() const {
+ return isSCSrcB16() || isLiteralImm(MVT::f16);
+ }
+
bool isVCSrcB32() const {
return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(MVT::i32);
}
@@ -324,6 +348,10 @@ public:
return isRegClass(AMDGPU::VS_64RegClassID) || isInlinableImm(MVT::i64);
}
+ bool isVCSrcB16() const {
+ return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(MVT::i16);
+ }
+
bool isVCSrcF32() const {
return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(MVT::f32);
}
@@ -332,6 +360,10 @@ public:
return isRegClass(AMDGPU::VS_64RegClassID) || isInlinableImm(MVT::f64);
}
+ bool isVCSrcF16() const {
+ return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(MVT::f16);
+ }
+
bool isVSrcB32() const {
return isVCSrcF32() || isLiteralImm(MVT::i32);
}
@@ -340,6 +372,10 @@ public:
return isVCSrcF64() || isLiteralImm(MVT::i64);
}
+ bool isVSrcB16() const {
+ return isVCSrcF16() || isLiteralImm(MVT::i16);
+ }
+
bool isVSrcF32() const {
return isVCSrcF32() || isLiteralImm(MVT::f32);
}
@@ -348,10 +384,18 @@ public:
return isVCSrcF64() || isLiteralImm(MVT::f64);
}
+ bool isVSrcF16() const {
+ return isVCSrcF16() || isLiteralImm(MVT::f16);
+ }
+
bool isKImmFP32() const {
return isLiteralImm(MVT::f32);
}
+ bool isKImmFP16() const {
+ return isLiteralImm(MVT::f16);
+ }
+
bool isMem() const override {
return false;
}
@@ -439,7 +483,16 @@ public:
void addLiteralImmOperand(MCInst &Inst, int64_t Val) const;
- void addKImmFP32Operands(MCInst &Inst, unsigned N) const;
+ template <unsigned Bitwidth>
+ void addKImmFPOperands(MCInst &Inst, unsigned N) const;
+
+ void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
+ addKImmFPOperands<16>(Inst, N);
+ }
+
+ void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
+ addKImmFPOperands<32>(Inst, N);
+ }
void addRegOperands(MCInst &Inst, unsigned N) const;
@@ -826,19 +879,23 @@ struct OptionalOperand {
} // end anonymous namespace
// May be called with integer type with equivalent bitwidth.
-static const fltSemantics *getFltSemantics(MVT VT) {
- switch (VT.getSizeInBits()) {
- case 32:
+static const fltSemantics *getFltSemantics(unsigned Size) {
+ switch (Size) {
+ case 4:
return &APFloat::IEEEsingle;
- case 64:
+ case 8:
return &APFloat::IEEEdouble;
- case 16:
+ case 2:
return &APFloat::IEEEhalf;
default:
llvm_unreachable("unsupported fp type");
}
}
+static const fltSemantics *getFltSemantics(MVT VT) {
+ return getFltSemantics(VT.getSizeInBits() / 8);
+}
+
//===----------------------------------------------------------------------===//
// Operand
//===----------------------------------------------------------------------===//
@@ -895,6 +952,12 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const {
AsmParser->hasInv2PiInlineImm());
}
+ if (type.getScalarSizeInBits() == 16) {
+ return AMDGPU::isInlinableLiteral16(
+ static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
+ AsmParser->hasInv2PiInlineImm());
+ }
+
return AMDGPU::isInlinableLiteral32(
static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
AsmParser->hasInv2PiInlineImm());
@@ -909,9 +972,13 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {
if (!Imm.IsFPImm) {
// We got int literal token.
+ unsigned Size = type.getSizeInBits();
+ if (Size == 64)
+ Size = 32;
+
// FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
// types.
- return isUInt<32>(Imm.Val) || isInt<32>(Imm.Val);
+ return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
}
// We got fp literal token
@@ -947,7 +1014,8 @@ void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers
}
}
- if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), Inst.getNumOperands())) {
+ if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
+ Inst.getNumOperands())) {
addLiteralImmOperand(Inst, Val);
} else {
Inst.addOperand(MCOperand::createImm(Val));
@@ -960,69 +1028,112 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
// Check that this operand accepts literals
assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
- APInt Literal(64, Val);
- auto OpSize = AMDGPU::getRegOperandSize(AsmParser->getMRI(), InstDesc, OpNum); // expected operand size
+ auto OpSize = AMDGPU::getOperandSize(InstDesc, OpNum); // expected operand size
if (Imm.IsFPImm) { // We got fp literal token
- if (OpSize == 8) { // Expected 64-bit operand
- // Check if literal is inlinable
+ APInt Literal(64, Val);
+
+ switch (OpSize) {
+ case 8: {
if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
- } else if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
+ return;
+ }
+
+ // Non-inlineable
+ if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
// For fp operands we check if low 32 bits are zeros
if (Literal.getLoBits(32) != 0) {
const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
- "Can't encode literal as exact 64-bit"
- " floating-point operand. Low 32-bits will be"
- " set to zero");
+ "Can't encode literal as exact 64-bit floating-point operand. "
+ "Low 32-bits will be set to zero");
}
+
Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
- } else {
- // We don't allow fp literals in 64-bit integer instructions. It is
- // unclear how we should encode them. This case should be checked earlier
- // in predicate methods (isLiteralImm())
- llvm_unreachable("fp literal in 64-bit integer instruction.");
+ return;
}
- } else { // Expected 32-bit operand
+
+ // We don't allow fp literals in 64-bit integer instructions. It is
+ // unclear how we should encode them. This case should be checked earlier
+ // in predicate methods (isLiteralImm())
+ llvm_unreachable("fp literal in 64-bit integer instruction.");
+ }
+ case 4:
+ case 2: {
bool lost;
APFloat FPLiteral(APFloat::IEEEdouble, Literal);
// Convert literal to single precision
- FPLiteral.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost);
+ FPLiteral.convert(*getFltSemantics(OpSize),
+ APFloat::rmNearestTiesToEven, &lost);
// We allow precision lost but not overflow or underflow. This should be
// checked earlier in isLiteralImm()
Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
+ return;
}
- } else { // We got int literal token
- if (OpSize == 8) { // Expected 64-bit operand
- auto LiteralVal = Literal.getZExtValue();
- if (AMDGPU::isInlinableLiteral64(LiteralVal,
- AsmParser->hasInv2PiInlineImm())) {
- Inst.addOperand(MCOperand::createImm(LiteralVal));
- return;
- }
- } else { // Expected 32-bit operand
- auto LiteralVal = static_cast<int32_t>(Literal.getLoBits(32).getZExtValue());
- if (AMDGPU::isInlinableLiteral32(LiteralVal,
- AsmParser->hasInv2PiInlineImm())) {
- Inst.addOperand(MCOperand::createImm(LiteralVal));
- return;
- }
+ default:
+ llvm_unreachable("invalid operand size");
+ }
+
+ return;
+ }
+
+ // We got int literal token.
+ // Only sign extend inline immediates.
+ // FIXME: No errors on truncation
+ switch (OpSize) {
+ case 4: {
+ if (isInt<32>(Val) &&
+ AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
+ AsmParser->hasInv2PiInlineImm())) {
+ Inst.addOperand(MCOperand::createImm(Val));
+ return;
}
- Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
+
+ Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
+ return;
+ }
+ case 8: {
+ if (AMDGPU::isInlinableLiteral64(Val,
+ AsmParser->hasInv2PiInlineImm())) {
+ Inst.addOperand(MCOperand::createImm(Val));
+ return;
+ }
+
+ Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
+ return;
+ }
+ case 2: {
+ if (isInt<16>(Val) &&
+ AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
+ AsmParser->hasInv2PiInlineImm())) {
+ Inst.addOperand(MCOperand::createImm(Val));
+ return;
+ }
+
+ Inst.addOperand(MCOperand::createImm(Val & 0xffff));
+ return;
+ }
+ default:
+ llvm_unreachable("invalid operand size");
}
}
-void AMDGPUOperand::addKImmFP32Operands(MCInst &Inst, unsigned N) const {
+template <unsigned Bitwidth>
+void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
APInt Literal(64, Imm.Val);
- if (Imm.IsFPImm) { // We got fp literal
- bool lost;
- APFloat FPLiteral(APFloat::IEEEdouble, Literal);
- FPLiteral.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost);
- Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
- } else { // We got int literal token
- Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
+
+ if (!Imm.IsFPImm) {
+ // We got int literal token.
+ Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
+ return;
}
+
+ bool Lost;
+ APFloat FPLiteral(APFloat::IEEEdouble, Literal);
+ FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
+ APFloat::rmNearestTiesToEven, &Lost);
+ Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
}
void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 1a8c04b150f..2247cad7bb5 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -88,6 +88,15 @@ DECODE_OPERAND(SReg_128)
DECODE_OPERAND(SReg_256)
DECODE_OPERAND(SReg_512)
+
+static DecodeStatus decodeOperand_VSrc16(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
+}
+
#define GET_SUBTARGETINFO_ENUM
#include "AMDGPUGenSubtargetInfo.inc"
#undef GET_SUBTARGETINFO_ENUM
@@ -250,6 +259,10 @@ MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const {
return decodeSrcOp(OPW64, Val);
}
+MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const {
+ return decodeSrcOp(OPW16, Val);
+}
+
MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
// Some instructions have operand restrictions beyond what the encoding
// allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra
@@ -324,28 +337,96 @@ MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
// Cast prevents negative overflow.
}
-MCOperand AMDGPUDisassembler::decodeFPImmed(bool Is32, unsigned Imm) {
+static int64_t getInlineImmVal32(unsigned Imm) {
+ switch (Imm) {
+ case 240:
+ return FloatToBits(0.5f);
+ case 241:
+ return FloatToBits(-0.5f);
+ case 242:
+ return FloatToBits(1.0f);
+ case 243:
+ return FloatToBits(-1.0f);
+ case 244:
+ return FloatToBits(2.0f);
+ case 245:
+ return FloatToBits(-2.0f);
+ case 246:
+ return FloatToBits(4.0f);
+ case 247:
+ return FloatToBits(-4.0f);
+ case 248: // 1 / (2 * PI)
+ return 0x3e22f983;
+ default:
+ llvm_unreachable("invalid fp inline imm");
+ }
+}
+
+static int64_t getInlineImmVal64(unsigned Imm) {
+ switch (Imm) {
+ case 240:
+ return DoubleToBits(0.5);
+ case 241:
+ return DoubleToBits(-0.5);
+ case 242:
+ return DoubleToBits(1.0);
+ case 243:
+ return DoubleToBits(-1.0);
+ case 244:
+ return DoubleToBits(2.0);
+ case 245:
+ return DoubleToBits(-2.0);
+ case 246:
+ return DoubleToBits(4.0);
+ case 247:
+ return DoubleToBits(-4.0);
+ case 248: // 1 / (2 * PI)
+ return 0x3fc45f306dc9c882;
+ default:
+ llvm_unreachable("invalid fp inline imm");
+ }
+}
+
+static int64_t getInlineImmVal16(unsigned Imm) {
+ switch (Imm) {
+ case 240:
+ return 0x3800;
+ case 241:
+ return 0xB800;
+ case 242:
+ return 0x3C00;
+ case 243:
+ return 0xBC00;
+ case 244:
+ return 0x4000;
+ case 245:
+ return 0xC000;
+ case 246:
+ return 0x4400;
+ case 247:
+ return 0xC400;
+ case 248: // 1 / (2 * PI)
+ return 0x3118;
+ default:
+ llvm_unreachable("invalid fp inline imm");
+ }
+}
+
+MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN
&& Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
+
// ToDo: case 248: 1/(2*PI) - is allowed only on VI
- // ToDo: AMDGPUInstPrinter does not support 1/(2*PI). It consider 1/(2*PI) as
- // literal constant.
- float V = 0.0f;
- switch (Imm) {
- case 240: V = 0.5f; break;
- case 241: V = -0.5f; break;
- case 242: V = 1.0f; break;
- case 243: V = -1.0f; break;
- case 244: V = 2.0f; break;
- case 245: V = -2.0f; break;
- case 246: V = 4.0f; break;
- case 247: V = -4.0f; break;
- case 248: return MCOperand::createImm(Is32 ? // 1/(2*PI)
- 0x3e22f983 :
- 0x3fc45f306dc9c882);
- default: break;
+ switch (Width) {
+ case OPW32:
+ return MCOperand::createImm(getInlineImmVal32(Imm));
+ case OPW64:
+ return MCOperand::createImm(getInlineImmVal64(Imm));
+ case OPW16:
+ return MCOperand::createImm(getInlineImmVal16(Imm));
+ default:
+ llvm_unreachable("implement me");
}
- return MCOperand::createImm(Is32? FloatToBits(V) : DoubleToBits(V));
}
unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
@@ -353,7 +434,9 @@ unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
switch (Width) {
default: // fall
- case OPW32: return VGPR_32RegClassID;
+ case OPW32:
+ case OPW16:
+ return VGPR_32RegClassID;
case OPW64: return VReg_64RegClassID;
case OPW128: return VReg_128RegClassID;
}
@@ -364,7 +447,9 @@ unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
switch (Width) {
default: // fall
- case OPW32: return SGPR_32RegClassID;
+ case OPW32:
+ case OPW16:
+ return SGPR_32RegClassID;
case OPW64: return SGPR_64RegClassID;
case OPW128: return SGPR_128RegClassID;
}
@@ -375,7 +460,9 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
switch (Width) {
default: // fall
- case OPW32: return TTMP_32RegClassID;
+ case OPW32:
+ case OPW16:
+ return TTMP_32RegClassID;
case OPW64: return TTMP_64RegClassID;
case OPW128: return TTMP_128RegClassID;
}
@@ -396,19 +483,26 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c
return createSRegOperand(getTtmpClassId(Width), Val - TTMP_MIN);
}
- assert(Width == OPW32 || Width == OPW64);
- const bool Is32 = (Width == OPW32);
+ assert(Width == OPW16 || Width == OPW32 || Width == OPW64);
if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
return decodeIntImmed(Val);
if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
- return decodeFPImmed(Is32, Val);
+ return decodeFPImmed(Width, Val);
if (Val == LITERAL_CONST)
return decodeLiteralConstant();
- return Is32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
+ switch (Width) {
+ case OPW32:
+ case OPW16:
+ return decodeSpecialReg32(Val);
+ case OPW64:
+ return decodeSpecialReg64(Val);
+ default:
+ llvm_unreachable("unexpected immediate type");
+ }
}
MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index c8b2f1ff239..ee5883a984e 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -66,6 +66,7 @@ public:
MCOperand decodeOperand_VGPR_32(unsigned Val) const;
MCOperand decodeOperand_VS_32(unsigned Val) const;
MCOperand decodeOperand_VS_64(unsigned Val) const;
+ MCOperand decodeOperand_VSrc16(unsigned Val) const;
MCOperand decodeOperand_VReg_64(unsigned Val) const;
MCOperand decodeOperand_VReg_96(unsigned Val) const;
@@ -83,6 +84,7 @@ public:
OPW32,
OPW64,
OPW128,
+ OPW16,
OPW_LAST_,
OPW_FIRST_ = OPW32
};
@@ -92,7 +94,7 @@ public:
unsigned getTtmpClassId(const OpWidthTy Width) const;
static MCOperand decodeIntImmed(unsigned Imm);
- static MCOperand decodeFPImmed(bool Is32, unsigned Imm);
+ static MCOperand decodeFPImmed(OpWidthTy Width, unsigned Imm);
MCOperand decodeLiteralConstant() const;
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const;
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index f95d790e2bc..b84aaaef090 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -47,7 +47,13 @@ void AMDGPUInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- O << formatHex(MI->getOperand(OpNo).getImm() & 0xffff);
+ // It's possible to end up with a 32-bit literal used with a 16-bit operand
+ // with ignored high bits. Print as 32-bit anyway in that case.
+ int64_t Imm = MI->getOperand(OpNo).getImm();
+ if (isInt<16>(Imm) || isUInt<16>(Imm))
+ O << formatHex(static_cast<uint64_t>(Imm & 0xffff));
+ else
+ printU32ImmOperand(MI, OpNo, STI, O);
}
void AMDGPUInstPrinter::printU4ImmDecOperand(const MCInst *MI, unsigned OpNo,
@@ -336,6 +342,38 @@ void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo, STI, O);
}
+void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ int16_t SImm = static_cast<int16_t>(Imm);
+ if (SImm >= -16 && SImm <= 64) {
+ O << SImm;
+ return;
+ }
+
+ if (Imm == 0x3C00)
+ O<< "1.0";
+ else if (Imm == 0xBC00)
+ O<< "-1.0";
+ else if (Imm == 0x3800)
+ O<< "0.5";
+ else if (Imm == 0xB800)
+ O<< "-0.5";
+ else if (Imm == 0x4000)
+ O<< "2.0";
+ else if (Imm == 0xC000)
+ O<< "-2.0";
+ else if (Imm == 0x4400)
+ O<< "4.0";
+ else if (Imm == 0xC400)
+ O<< "-4.0";
+ else if (Imm == 0x3118) {
+ assert(STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]);
+ O << "0.15915494";
+ } else
+ O << formatHex(static_cast<uint64_t>(Imm));
+}
+
void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -431,22 +469,39 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
}
} else if (Op.isImm()) {
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
- int RCID = Desc.OpInfo[OpNo].RegClass;
- if (RCID != -1) {
- unsigned RCBits = AMDGPU::getRegBitWidth(MRI.getRegClass(RCID));
- if (RCBits == 32)
- printImmediate32(Op.getImm(), STI, O);
- else if (RCBits == 64)
- printImmediate64(Op.getImm(), STI, O);
- else
- llvm_unreachable("Invalid register class size");
- } else if (Desc.OpInfo[OpNo].OperandType == MCOI::OPERAND_IMMEDIATE) {
+ switch (Desc.OpInfo[OpNo].OperandType) {
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case MCOI::OPERAND_IMMEDIATE:
printImmediate32(Op.getImm(), STI, O);
- } else {
+ break;
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ printImmediate64(Op.getImm(), STI, O);
+ break;
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ printImmediate16(Op.getImm(), STI, O);
+ break;
+ case MCOI::OPERAND_UNKNOWN:
+ case MCOI::OPERAND_PCREL:
+ O << formatDec(Op.getImm());
+ break;
+ case MCOI::OPERAND_REGISTER:
+ // FIXME: This should be removed and handled somewhere else. Seems to come
+ // from a disassembler bug.
+ O << "/*invalid immediate*/";
+ break;
+ default:
// We hit this for the immediate instruction bits that don't yet have a
// custom printer.
- // TODO: Eventually this should be unnecessary.
- O << formatDec(Op.getImm());
+ llvm_unreachable("unexpected immediate operand type");
}
} else if (Op.isFPImm()) {
// We special case 0.0 because otherwise it will be printed as an integer.
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
index 9d6a203426a..f2ed0e09bbf 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -88,6 +88,8 @@ private:
void printRegOperand(unsigned RegNo, raw_ostream &O);
void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index e60ead8acdc..4a046acfabb 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -39,7 +39,7 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
const MCRegisterInfo &MRI;
/// \brief Encode an fp or int literal
- uint32_t getLitEncoding(const MCOperand &MO, unsigned OpSize,
+ uint32_t getLitEncoding(const MCOperand &MO, const MCOperandInfo &OpInfo,
const MCSubtargetInfo &STI) const;
public:
@@ -87,6 +87,42 @@ static uint32_t getIntInlineImmEncoding(IntTy Imm) {
return 0;
}
+static uint32_t getLit16Encoding(uint16_t Val, const MCSubtargetInfo &STI) {
+ uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val));
+ if (IntImm != 0)
+ return IntImm;
+
+ if (Val == 0x3800) // 0.5
+ return 240;
+
+ if (Val == 0xB800) // -0.5
+ return 241;
+
+ if (Val == 0x3C00) // 1.0
+ return 242;
+
+ if (Val == 0xBC00) // -1.0
+ return 243;
+
+ if (Val == 0x4000) // 2.0
+ return 244;
+
+ if (Val == 0xC000) // -2.0
+ return 245;
+
+ if (Val == 0x4400) // 4.0
+ return 246;
+
+ if (Val == 0xC400) // -4.0
+ return 247;
+
+ if (Val == 0x3118 && // 1.0 / (2.0 * pi)
+ STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
+ return 248;
+
+ return 255;
+}
+
static uint32_t getLit32Encoding(uint32_t Val, const MCSubtargetInfo &STI) {
uint32_t IntImm = getIntInlineImmEncoding(static_cast<int32_t>(Val));
if (IntImm != 0)
@@ -160,7 +196,7 @@ static uint32_t getLit64Encoding(uint64_t Val, const MCSubtargetInfo &STI) {
}
uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
- unsigned OpSize,
+ const MCOperandInfo &OpInfo,
const MCSubtargetInfo &STI) const {
int64_t Imm;
@@ -180,12 +216,16 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
Imm = MO.getImm();
}
- if (OpSize == 4)
+ switch (AMDGPU::getOperandSize(OpInfo)) {
+ case 4:
return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
-
- assert(OpSize == 8);
-
- return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
+ case 8:
+ return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
+ case 2:
+ return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
+ default:
+ llvm_unreachable("invalid operand size");
+ }
}
void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -212,12 +252,9 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
if (!AMDGPU::isSISrcOperand(Desc, i))
continue;
- int RCID = Desc.OpInfo[i].RegClass;
- const MCRegisterClass &RC = MRI.getRegClass(RCID);
-
// Is this operand a literal immediate?
const MCOperand &Op = MI.getOperand(i);
- if (getLitEncoding(Op, AMDGPU::getRegBitWidth(RC) / 8, STI) != 255)
+ if (getLitEncoding(Op, Desc.OpInfo[i], STI) != 255)
continue;
// Yes! Encode it
@@ -282,9 +319,7 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
if (AMDGPU::isSISrcOperand(Desc, OpNo)) {
- uint32_t Enc = getLitEncoding(MO,
- AMDGPU::getRegOperandSize(&MRI, Desc, OpNo),
- STI);
+ uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI);
if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4))
return Enc;
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 6bb31a9a781..ff4e3214718 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -88,17 +88,36 @@ enum ClassFlags {
namespace AMDGPU {
enum OperandType {
/// Operands with register or 32-bit immediate
- OPERAND_REG_IMM32_INT = MCOI::OPERAND_FIRST_TARGET,
- OPERAND_REG_IMM32_FP,
+ OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET,
+ OPERAND_REG_IMM_INT64,
+ OPERAND_REG_IMM_INT16,
+ OPERAND_REG_IMM_FP32,
+ OPERAND_REG_IMM_FP64,
+ OPERAND_REG_IMM_FP16,
+
/// Operands with register or inline constant
- OPERAND_REG_INLINE_C_INT,
- OPERAND_REG_INLINE_C_FP,
+ OPERAND_REG_INLINE_C_INT16,
+ OPERAND_REG_INLINE_C_INT32,
+ OPERAND_REG_INLINE_C_INT64,
+ OPERAND_REG_INLINE_C_FP16,
+ OPERAND_REG_INLINE_C_FP32,
+ OPERAND_REG_INLINE_C_FP64,
+
+ OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
+ OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_FP16,
+
+ OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
+ OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_C_FP64,
+
+ OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
+ OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
// Operand for source modifiers for VOP instructions
OPERAND_INPUT_MODS,
/// Operand with 32-bit immediate that uses the constant bus.
- OPERAND_KIMM32
+ OPERAND_KIMM32,
+ OPERAND_KIMM16
};
}
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 3d59f8d82ae..831ac5948a6 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -315,12 +315,14 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
return;
}
- APInt Imm(64, OpToFold.getImm());
const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
const TargetRegisterClass *FoldRC =
TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);
+ APInt Imm(TII->operandBitWidth(FoldDesc.OpInfo[1].OperandType),
+ OpToFold.getImm());
+
// Split 64-bit constants into 32-bits for folding.
if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
unsigned UseReg = UseOp.getReg();
@@ -329,6 +331,8 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
MRI.getRegClass(UseReg) :
TRI.getPhysRegClass(UseReg);
+ assert(Imm.getBitWidth() == 64);
+
if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
return;
@@ -505,7 +509,6 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
if (!isSafeToFold(MI))
continue;
- unsigned OpSize = TII->getOpSize(MI, 1);
MachineOperand &OpToFold = MI.getOperand(1);
bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
@@ -559,14 +562,15 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
Use = MRI.use_begin(Dst.getReg()), E = MRI.use_end();
Use != E; ++Use) {
MachineInstr *UseMI = Use->getParent();
+ unsigned OpNo = Use.getOperandNo();
- if (TII->isInlineConstant(OpToFold, OpSize)) {
- foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList,
+ if (TII->isInlineConstant(*UseMI, OpNo, OpToFold)) {
+ foldOperand(OpToFold, UseMI, OpNo, FoldList,
CopiesToReplace, TII, TRI, MRI);
} else {
if (++NumLiteralUses == 1) {
NonInlineUse = &*Use;
- NonInlineUseOpNo = Use.getOperandNo();
+ NonInlineUseOpNo = OpNo;
}
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9071ded6567..981b63c59a8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1415,10 +1415,12 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
// If this is a free constant, there's no reason to do this.
// TODO: We could fold this here instead of letting SIFoldOperands do it
// later.
- if (isInlineConstant(ImmOp, 4))
+ MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0);
+
+ // Any src operand can be used for the legality check.
+ if (isInlineConstant(UseMI, *Src0, ImmOp))
return false;
- MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0);
MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
@@ -1620,8 +1622,10 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
case AMDGPU::V_MAC_F16_e32:
IsF16 = true;
case AMDGPU::V_MAC_F32_e32: {
- const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
- if (Src0->isImm() && !isInlineConstant(*Src0, 4))
+ int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::src0);
+ const MachineOperand *Src0 = &MI.getOperand(Src0Idx);
+ if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0))
return nullptr;
break;
}
@@ -1682,46 +1686,55 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
case 64:
return AMDGPU::isInlinableLiteral64(Imm.getSExtValue(),
ST.hasInv2PiInlineImm());
+ case 16:
+ return AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
+ ST.hasInv2PiInlineImm());
default:
llvm_unreachable("invalid bitwidth");
}
}
bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
- unsigned OpSize) const {
- if (MO.isImm()) {
- // MachineOperand provides no way to tell the true operand size, since it
- // only records a 64-bit value. We need to know the size to determine if a
- // 32-bit floating point immediate bit pattern is legal for an integer
- // immediate. It would be for any 32-bit integer operand, but would not be
- // for a 64-bit one.
- switch (OpSize) {
- case 4:
- return AMDGPU::isInlinableLiteral32(static_cast<int32_t>(MO.getImm()),
- ST.hasInv2PiInlineImm());
- case 8:
- return AMDGPU::isInlinableLiteral64(MO.getImm(),
- ST.hasInv2PiInlineImm());
- default:
- llvm_unreachable("invalid bitwidth");
- }
- }
+ uint8_t OperandType) const {
+ if (!MO.isImm() || OperandType < MCOI::OPERAND_FIRST_TARGET)
+ return false;
- return false;
-}
+ // MachineOperand provides no way to tell the true operand size, since it only
+ // records a 64-bit value. We need to know the size to determine if a 32-bit
+ // floating point immediate bit pattern is legal for an integer immediate. It
+ // would be for any 32-bit integer operand, but would not be for a 64-bit one.
-bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO,
- unsigned OpSize) const {
- return MO.isImm() && !isInlineConstant(MO, OpSize);
+ int64_t Imm = MO.getImm();
+ switch (operandBitWidth(OperandType)) {
+ case 32: {
+ int32_t Trunc = static_cast<int32_t>(Imm);
+ return Trunc == Imm &&
+ AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
+ }
+ case 64: {
+ return AMDGPU::isInlinableLiteral64(MO.getImm(),
+ ST.hasInv2PiInlineImm());
+ }
+ case 16: {
+ if (isInt<16>(Imm) || isUInt<16>(Imm)) {
+ int16_t Trunc = static_cast<int16_t>(Imm);
+ return AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
+ }
+
+ return false;
+ }
+ default:
+ llvm_unreachable("invalid bitwidth");
+ }
}
bool SIInstrInfo::isLiteralConstantLike(const MachineOperand &MO,
- unsigned OpSize) const {
+ const MCOperandInfo &OpInfo) const {
switch (MO.getType()) {
case MachineOperand::MO_Register:
return false;
case MachineOperand::MO_Immediate:
- return !isInlineConstant(MO, OpSize);
+ return !isInlineConstant(MO, OpInfo);
case MachineOperand::MO_FrameIndex:
case MachineOperand::MO_MachineBasicBlock:
case MachineOperand::MO_ExternalSymbol:
@@ -1760,11 +1773,10 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
if (OpInfo.RegClass < 0)
return false;
- unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize();
- if (isLiteralConstant(MO, OpSize))
- return RI.opCanUseLiteralConstant(OpInfo.OperandType);
+ if (MO.isImm() && isInlineConstant(MO, OpInfo))
+ return RI.opCanUseInlineConstant(OpInfo.OperandType);
- return RI.opCanUseInlineConstant(OpInfo.OperandType);
+ return RI.opCanUseLiteralConstant(OpInfo.OperandType);
}
bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
@@ -1791,12 +1803,17 @@ bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
const MachineOperand &MO,
- unsigned OpSize) const {
+ const MCOperandInfo &OpInfo) const {
// Literal constants use the constant bus.
- if (isLiteralConstant(MO, OpSize))
- return true;
+ //if (isLiteralConstantLike(MO, OpInfo))
+ // return true;
+ if (MO.isImm())
+ return !isInlineConstant(MO, OpInfo);
- if (!MO.isReg() || !MO.isUse())
+ if (!MO.isReg())
+ return true; // Misc other operands like FrameIndex
+
+ if (!MO.isUse())
return false;
if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
@@ -1925,17 +1942,22 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
return false;
}
break;
- case AMDGPU::OPERAND_REG_IMM32_INT:
- case AMDGPU::OPERAND_REG_IMM32_FP:
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
break;
- case AMDGPU::OPERAND_REG_INLINE_C_INT:
- case AMDGPU::OPERAND_REG_INLINE_C_FP:
- if (isLiteralConstant(MI.getOperand(i),
- RI.getRegClass(RegClass)->getSize())) {
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() && (!MO.isImm() || !isInlineConstant(MI, i))) {
ErrInfo = "Illegal immediate value for operand.";
return false;
}
break;
+ }
case MCOI::OPERAND_IMMEDIATE:
case AMDGPU::OPERAND_KIMM32:
// Check if this operand is an immediate.
@@ -1987,7 +2009,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
if (OpIdx == -1)
break;
const MachineOperand &MO = MI.getOperand(OpIdx);
- if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) {
+ if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
if (MO.isReg()) {
if (MO.getReg() != SGPRUsed)
++ConstantBusCount;
@@ -2330,7 +2352,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
if (!MO)
MO = &MI.getOperand(OpIdx);
- if (isVALU(MI) && usesConstantBus(MRI, *MO, DefinedRC->getSize())) {
+ if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
RegSubRegPair SGPRUsed;
if (MO->isReg())
@@ -2342,7 +2364,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
const MachineOperand &Op = MI.getOperand(i);
if (Op.isReg()) {
if ((Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) &&
- usesConstantBus(MRI, Op, getOpSize(MI, i))) {
+ usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) {
return false;
}
} else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
@@ -3539,14 +3561,14 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
if (Src0Idx == -1)
return 4; // No operands.
- if (isLiteralConstantLike(MI.getOperand(Src0Idx), getOpSize(MI, Src0Idx)))
+ if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
return 8;
int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
if (Src1Idx == -1)
return 4;
- if (isLiteralConstantLike(MI.getOperand(Src1Idx), getOpSize(MI, Src1Idx)))
+ if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
return 8;
return 4;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 0f16fa0902f..81d0ef42234 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -462,15 +462,96 @@ public:
return !RI.isSGPRReg(MRI, Dest);
}
+ static int operandBitWidth(uint8_t OperandType) {
+ switch (OperandType) {
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ return 32;
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ return 64;
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ return 16;
+ default:
+ llvm_unreachable("unexpected operand type");
+ }
+ }
+
bool isInlineConstant(const APInt &Imm) const;
- bool isInlineConstant(const MachineOperand &MO, unsigned OpSize) const;
- bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const;
+
+ bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
+
+ bool isInlineConstant(const MachineOperand &MO,
+ const MCOperandInfo &OpInfo) const {
+ return isInlineConstant(MO, OpInfo.OperandType);
+ }
+
+ /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
+ /// be an inline immediate.
+ bool isInlineConstant(const MachineInstr &MI,
+ const MachineOperand &UseMO,
+ const MachineOperand &DefMO) const {
+ assert(UseMO.getParent() == &MI);
+ int OpIdx = MI.getOperandNo(&UseMO);
+ if (!MI.getDesc().OpInfo || OpIdx > MI.getDesc().NumOperands) {
+ return false;
+ }
+
+ return isInlineConstant(DefMO, MI.getDesc().OpInfo[OpIdx]);
+ }
+
+ /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
+ /// immediate.
+ bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType);
+ }
+
+ bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
+ const MachineOperand &MO) const {
+ if (!MI.getDesc().OpInfo || OpIdx > MI.getDesc().NumOperands)
+ return false;
+
+ if (MI.isCopy()) {
+ unsigned Size = getOpSize(MI, OpIdx);
+ assert(Size == 8 || Size == 4);
+
+ uint8_t OpType = (Size == 8) ?
+ AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
+ return isInlineConstant(MO, OpType);
+ }
+
+ return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType);
+ }
+
+ bool isInlineConstant(const MachineOperand &MO) const {
+ const MachineInstr *Parent = MO.getParent();
+ return isInlineConstant(*Parent, Parent->getOperandNo(&MO));
+ }
+
+ bool isLiteralConstant(const MachineOperand &MO,
+ const MCOperandInfo &OpInfo) const {
+ return MO.isImm() && !isInlineConstant(MO, OpInfo.OperandType);
+ }
+
+ bool isLiteralConstant(const MachineInstr &MI, int OpIdx) const {
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ return MO.isImm() && !isInlineConstant(MI, OpIdx);
+ }
// Returns true if this operand could potentially require a 32-bit literal
// operand, but not necessarily. A FrameIndex for example could resolve to an
// inline immediate value that will not require an additional 4-bytes; this
// assumes that it will.
- bool isLiteralConstantLike(const MachineOperand &MO, unsigned OpSize) const;
+ bool isLiteralConstantLike(const MachineOperand &MO,
+ const MCOperandInfo &OpInfo) const;
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
const MachineOperand &MO) const;
@@ -482,7 +563,7 @@ public:
/// \brief Returns true if this operand uses the constant bus.
bool usesConstantBus(const MachineRegisterInfo &MRI,
const MachineOperand &MO,
- unsigned OpSize) const;
+ const MCOperandInfo &OpInfo) const;
/// \brief Return true if this instruction has any modifiers.
/// e.g. src[012]_mod, omod, clamp.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index aeef7acdfef..9f7c921c565 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -445,22 +445,30 @@ def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
} // End OperandType = "OPERAND_IMMEDIATE"
-
-// 32-bit VALU immediate operand that uses the constant bus.
-def KImmFP32MatchClass : AsmOperandClass {
- let Name = "KImmFP32";
- let PredicateMethod = "isKImmFP32";
+class KImmMatchClass<int size> : AsmOperandClass {
+ let Name = "KImmFP"#size;
+ let PredicateMethod = "isKImmFP"#size;
let ParserMethod = "parseImm";
- let RenderMethod = "addKImmFP32Operands";
+ let RenderMethod = "addKImmFP"#size#"Operands";
}
-def f32kimm : Operand<i32> {
+class kimmOperand<ValueType vt> : Operand<vt> {
let OperandNamespace = "AMDGPU";
- let OperandType = "OPERAND_KIMM32";
- let PrintMethod = "printU32ImmOperand";
- let ParserMatchClass = KImmFP32MatchClass;
+ let OperandType = "OPERAND_KIMM"#vt.Size;
+ let PrintMethod = "printU"#vt.Size#"ImmOperand";
+ let ParserMatchClass = !cast<AsmOperandClass>("KImmFP"#vt.Size#"MatchClass");
}
+// 32-bit VALU immediate operand that uses the constant bus.
+def KImmFP32MatchClass : KImmMatchClass<32>;
+def f32kimm : kimmOperand<i32>;
+
+// 32-bit VALU immediate operand with a 16-bit value that uses the
+// constant bus.
+def KImmFP16MatchClass : KImmMatchClass<16>;
+def f16kimm : kimmOperand<i16>;
+
+
def VOPDstS64 : VOPDstOperand <SReg_64>;
class FPInputModsMatchClass <int opSize> : AsmOperandClass {
@@ -468,6 +476,7 @@ class FPInputModsMatchClass <int opSize> : AsmOperandClass {
let ParserMethod = "parseRegOrImmWithFPInputMods";
let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
}
+def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
@@ -480,6 +489,8 @@ class InputMods <AsmOperandClass matchClass> : Operand <i32> {
class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> {
let PrintMethod = "printOperandAndFPInputMods";
}
+
+def FP16InputMods : FPInputMods<FP16InputModsMatchClass>;
def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
@@ -629,8 +640,8 @@ class getVOPSrc0ForVT<ValueType VT> {
!if(!eq(VT.Value, f64.Value), 1,
0)));
RegisterOperand ret = !if(isFP,
- !if(!eq(VT.Size, 64), VSrc_f64, VSrc_f32),
- !if(!eq(VT.Size, 64), VSrc_b64, VSrc_b32));
+ !if(!eq(VT.Size, 64), VSrc_f64, !if(!eq(VT.Size, 16), VSrc_f16, VSrc_f32)),
+ !if(!eq(VT.Size, 64), VSrc_b64, !if(!eq(VT.Size, 16), VSrc_b16, VSrc_b32)));
}
// Returns the vreg register class to use for source operand given VT
@@ -657,8 +668,9 @@ class getVOP3SrcForVT<ValueType VT> {
!if(!eq(VT.Value, i1.Value),
SCSrc_b64,
!if(isFP,
- VCSrc_f32,
- VCSrc_b32)
+ !if(!eq(VT.Size, 16), VCSrc_f16, VCSrc_f32),
+ !if(!eq(VT.Size, 16), VCSrc_b16, VCSrc_b32)
+ )
)
)
);
@@ -691,7 +703,13 @@ class getSrcMod <ValueType VT> {
0)));
Operand ret = !if(!eq(VT.Size, 64),
!if(isFP, FP64InputMods, Int64InputMods),
- !if(isFP, FP32InputMods, Int32InputMods));
+ !if(isFP,
+ !if(!eq(VT.Value, f16.Value),
+ FP16InputMods,
+ FP32InputMods
+ ),
+ Int32InputMods)
+ );
}
// Returns the input arguments for VOP[12C] instructions for the given SrcVT.
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 0d6166ac2af..83c4fc45cef 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -107,9 +107,8 @@ def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst),
// 64-bit vector move instruction. This is mainly used by the SIFoldOperands
// pass to enable folding of inline immediates.
-def V_MOV_B64_PSEUDO : PseudoInstSI <(outs VReg_64:$vdst), (ins VSrc_b64:$src0)> {
- let VALU = 1;
-}
+def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
+ (ins VSrc_b64:$src0)>;
} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
let usesCustomInserter = 1, SALU = 1 in {
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 41633a2b6a0..bda0a44c984 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1085,19 +1085,6 @@ bool SIRegisterInfo::shouldRewriteCopySrc(
return getCommonSubClass(DefRC, SrcRC) != nullptr;
}
-bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
- return OpType == AMDGPU::OPERAND_REG_IMM32_INT ||
- OpType == AMDGPU::OPERAND_REG_IMM32_FP;
-}
-
-bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
- if (opCanUseLiteralConstant(OpType))
- return true;
-
- return OpType == AMDGPU::OPERAND_REG_INLINE_C_INT ||
- OpType == AMDGPU::OPERAND_REG_INLINE_C_FP;
-}
-
// FIXME: Most of these are flexible with HSA and we don't need to reserve them
// as input registers if unused. Whether the dispatch ptr is necessary should be
// easy to detect from used intrinsics. Scratch setup is harder to know.
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 672df79218b..0bcae7d9840 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -16,6 +16,7 @@
#define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
#include "AMDGPURegisterInfo.h"
+#include "SIDefines.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
namespace llvm {
@@ -138,12 +139,19 @@ public:
/// \returns True if operands defined with this operand type can accept
/// a literal constant (i.e. any 32-bit immediate).
- bool opCanUseLiteralConstant(unsigned OpType) const;
+ bool opCanUseLiteralConstant(unsigned OpType) const {
+ // TODO: 64-bit operands have extending behavior from 32-bit literal.
+ return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST &&
+ OpType <= AMDGPU::OPERAND_REG_IMM_LAST;
+ }
/// \returns True if operands defined with this operand type can accept
/// an inline constant. i.e. An integer value in the range (-16, 64) or
/// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
- bool opCanUseInlineConstant(unsigned OpType) const;
+ bool opCanUseInlineConstant(unsigned OpType) const {
+ return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
+ OpType <= AMDGPU::OPERAND_SRC_LAST;
+ }
enum PreloadedValue {
// SGPRS:
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 3bd3f882d04..0dd9fa5bb34 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -384,31 +384,43 @@ class RegImmMatcher<string name> : AsmOperandClass {
multiclass SIRegOperand <string rc, string MatchName, string opType> {
let OperandNamespace = "AMDGPU" in {
+ def _b16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+ let OperandType = opType#"_INT16";
+ let ParserMatchClass = RegImmMatcher<MatchName#"B16">;
+ let DecoderMethod = "decodeOperand_VSrc16";
+ }
+
+ def _f16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+ let OperandType = opType#"_FP16";
+ let ParserMatchClass = RegImmMatcher<MatchName#"F16">;
+ let DecoderMethod = "decodeOperand_VSrc16";
+ }
def _b32 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
- let OperandType = opType#"_INT";
+ let OperandType = opType#"_INT32";
let ParserMatchClass = RegImmMatcher<MatchName#"B32">;
}
def _f32 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
- let OperandType = opType#"_FP";
+ let OperandType = opType#"_FP32";
let ParserMatchClass = RegImmMatcher<MatchName#"F32">;
}
def _b64 : RegisterOperand<!cast<RegisterClass>(rc#"_64")> {
- let OperandType = opType#"_INT";
+ let OperandType = opType#"_INT64";
let ParserMatchClass = RegImmMatcher<MatchName#"B64">;
}
def _f64 : RegisterOperand<!cast<RegisterClass>(rc#"_64")> {
- let OperandType = opType#"_FP";
+ let OperandType = opType#"_FP64";
let ParserMatchClass = RegImmMatcher<MatchName#"F64">;
}
}
}
+// FIXME: 64-bit sources can sometimes use 32-bit constants.
multiclass RegImmOperand <string rc, string MatchName>
- : SIRegOperand<rc, MatchName, "OPERAND_REG_IMM32">;
+ : SIRegOperand<rc, MatchName, "OPERAND_REG_IMM">;
multiclass RegInlineOperand <string rc, string MatchName>
: SIRegOperand<rc, MatchName, "OPERAND_REG_INLINE_C">;
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 9ee2ededbb0..b27d7c69103 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -134,15 +134,14 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
- MachineOperand &Src0 = MI.getOperand(Src0Idx);
// Only one literal constant is allowed per instruction, so if src0 is a
// literal constant then we can't do any folding.
- if (Src0.isImm() &&
- TII->isLiteralConstant(Src0, TII->getOpSize(MI, Src0Idx)))
+ if (TII->isLiteralConstant(MI, Src0Idx))
return;
// Try to fold Src0
+ MachineOperand &Src0 = MI.getOperand(Src0Idx);
if (Src0.isReg() && MRI.hasOneUse(Src0.getReg())) {
unsigned Reg = Src0.getReg();
MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
@@ -184,11 +183,15 @@ static void copyFlagsToImplicitVCC(MachineInstr &MI,
}
static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
- return isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4);
+ return isInt<16>(Src.getImm()) &&
+ !TII->isInlineConstant(*Src.getParent(),
+ Src.getParent()->getOperandNo(&Src));
}
static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
- return isUInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4);
+ return isUInt<16>(Src.getImm()) &&
+ !TII->isInlineConstant(*Src.getParent(),
+ Src.getParent()->getOperandNo(&Src));
}
static bool isKImmOrKUImmOperand(const SIInstrInfo *TII,
@@ -196,12 +199,12 @@ static bool isKImmOrKUImmOperand(const SIInstrInfo *TII,
bool &IsUnsigned) {
if (isInt<16>(Src.getImm())) {
IsUnsigned = false;
- return !TII->isInlineConstant(Src, 4);
+ return !TII->isInlineConstant(Src);
}
if (isUInt<16>(Src.getImm())) {
IsUnsigned = true;
- return !TII->isInlineConstant(Src, 4);
+ return !TII->isInlineConstant(Src);
}
return false;
@@ -212,7 +215,7 @@ static bool isKImmOrKUImmOperand(const SIInstrInfo *TII,
static bool isReverseInlineImm(const SIInstrInfo *TII,
const MachineOperand &Src,
int32_t &ReverseImm) {
- if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src, 4))
+ if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src))
return false;
ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Src.getImm()));
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 29cac2fbf6d..85cbadf0a57 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -329,25 +329,29 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
-
- return OpType == AMDGPU::OPERAND_REG_IMM32_INT ||
- OpType == AMDGPU::OPERAND_REG_IMM32_FP ||
- OpType == AMDGPU::OPERAND_REG_INLINE_C_INT ||
- OpType == AMDGPU::OPERAND_REG_INLINE_C_FP;
+ return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
+ OpType <= AMDGPU::OPERAND_SRC_LAST;
}
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
-
- return OpType == AMDGPU::OPERAND_REG_IMM32_FP ||
- OpType == AMDGPU::OPERAND_REG_INLINE_C_FP;
+ switch (OpType) {
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ return true;
+ default:
+ return false;
+ }
}
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
-
- return OpType == AMDGPU::OPERAND_REG_INLINE_C_INT ||
- OpType == AMDGPU::OPERAND_REG_INLINE_C_FP;
+ return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
+ OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
}
// Avoid using MCRegisterClass::getSize, since that function will go away
@@ -413,6 +417,15 @@ bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
if (Literal >= -16 && Literal <= 64)
return true;
+ // The actual type of the operand does not seem to matter as long
+ // as the bits match one of the inline immediate values. For example:
+ //
+ // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
+ // so it is a legal inline immediate.
+ //
+ // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
+ // floating-point, so it is a legal inline immediate.
+
uint32_t Val = static_cast<uint32_t>(Literal);
return (Val == FloatToBits(0.0f)) ||
(Val == FloatToBits(1.0f)) ||
@@ -426,6 +439,23 @@ bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
(Val == 0x3e22f983 && HasInv2Pi);
}
+bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
+ assert(HasInv2Pi);
+
+ if (Literal >= -16 && Literal <= 64)
+ return true;
+
+ uint16_t Val = static_cast<uint16_t>(Literal);
+ return Val == 0x3C00 || // 1.0
+ Val == 0xBC00 || // -1.0
+ Val == 0x3800 || // 0.5
+ Val == 0xB800 || // -0.5
+ Val == 0x4000 || // 2.0
+ Val == 0xC000 || // -2.0
+ Val == 0x4400 || // 4.0
+ Val == 0xC400 || // -4.0
+ Val == 0x3118; // 1/2pi
+}
} // End namespace AMDGPU
} // End namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 3101b96c8eb..ea5fc366d20 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -13,6 +13,8 @@
#include "AMDKernelCodeT.h"
#include "llvm/IR/CallingConv.h"
+#include "SIDefines.h"
+
#define GET_INSTRINFO_OPERAND_ENUM
#include "AMDGPUGenInstrInfo.inc"
#undef GET_INSTRINFO_OPERAND_ENUM
@@ -167,6 +169,37 @@ unsigned getRegBitWidth(const MCRegisterClass &RC);
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
unsigned OpNo);
+LLVM_READNONE
+inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
+ switch (OpInfo.OperandType) {
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ return 4;
+
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ return 8;
+
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ return 2;
+
+ default:
+ llvm_unreachable("unhandled operand type");
+ }
+}
+
+LLVM_READNONE
+inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
+ return getOperandSize(Desc.OpInfo[OpNo]);
+}
+
/// \brief Is this literal inlinable
LLVM_READNONE
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
@@ -174,6 +207,8 @@ bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
LLVM_READNONE
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
+LLVM_READNONE
+bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
} // end namespace AMDGPU
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 0e87f90b62b..37e31f57b24 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -134,7 +134,8 @@ multiclass VOP2eInst <string opName,
}
class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
- field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, f32kimm:$imm);
+ field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
+ field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm);
field string Asm32 = "$vdst, $src0, $src1, $imm";
field bit HasExt = 0;
}
@@ -143,7 +144,8 @@ def VOP_MADAK_F16 : VOP_MADAK <f16>;
def VOP_MADAK_F32 : VOP_MADAK <f32>;
class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
- field dag Ins32 = (ins VCSrc_f32:$src0, f32kimm:$imm, VGPR_32:$src1);
+ field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
+ field dag Ins32 = (ins VCSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1);
field string Asm32 = "$vdst, $src0, $imm, $src1";
field bit HasExt = 0;
}
OpenPOWER on IntegriCloud