summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td8
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h5
-rw-r--r--llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp24
-rw-r--r--llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h6
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp6
-rw-r--r--llvm/test/CodeGen/AMDGPU/imm.ll60
-rw-r--r--llvm/test/MC/AMDGPU/literals.s20
9 files changed, 113 insertions, 24 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 90796f22b19..18098e77caa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -153,6 +153,12 @@ def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime",
"Has s_memrealtime instruction"
>;
+def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm",
+ "HasInv2PiInlineImm",
+ "true",
+ "Has 1 / (2 * pi) as inline immediate"
+>;
+
def Feature16BitInsts : SubtargetFeature<"16-bit-insts",
"Has16BitInsts",
"true",
@@ -318,7 +324,7 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
- FeatureScalarStores
+ FeatureScalarStores, FeatureInv2PiInlineImm
]
>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 2ba18dd634e..dc3c64d0174 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -110,6 +110,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
Has16BitInsts(false),
HasMovrel(false),
HasVGPRIndexMode(false),
+ HasScalarStores(false),
+ HasInv2PiInlineImm(false),
FlatAddressSpace(false),
R600ALUInst(false),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 416772199da..60142fe1005 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -106,6 +106,7 @@ protected:
bool HasMovrel;
bool HasVGPRIndexMode;
bool HasScalarStores;
+ bool HasInv2PiInlineImm;
bool FlatAddressSpace;
bool R600ALUInst;
bool CaymanISA;
@@ -532,6 +533,10 @@ public:
return HasScalarStores;
}
+ bool hasInv2PiInlineImm() const {
+ return HasInv2PiInlineImm;
+ }
+
bool enableSIScheduler() const {
return EnableSIScheduler;
}
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index be9e74bfcc6..aec10087a88 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -316,7 +316,9 @@ void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo, STI, O);
}
-void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, raw_ostream &O) {
+void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
int32_t SImm = static_cast<int32_t>(Imm);
if (SImm >= -16 && SImm <= 64) {
O << SImm;
@@ -341,11 +343,16 @@ void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, raw_ostream &O) {
O << "4.0";
else if (Imm == FloatToBits(-4.0f))
O << "-4.0";
+ else if (Imm == 0x3e22f983 &&
+ STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
+ O << "1/2pi";
else
O << formatHex(static_cast<uint64_t>(Imm));
}
-void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, raw_ostream &O) {
+void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
int64_t SImm = static_cast<int64_t>(Imm);
if (SImm >= -16 && SImm <= 64) {
O << SImm;
@@ -370,6 +377,9 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, raw_ostream &O) {
O << "4.0";
else if (Imm == DoubleToBits(-4.0))
O << "-4.0";
+ else if (Imm == 0x3fc45f306dc9c882 &&
+ STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
+ O << "1/2pi";
else {
assert(isUInt<32>(Imm) || Imm == 0x3fc45f306dc9c882);
@@ -405,13 +415,13 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (RCID != -1) {
unsigned RCBits = AMDGPU::getRegBitWidth(MRI.getRegClass(RCID));
if (RCBits == 32)
- printImmediate32(Op.getImm(), O);
+ printImmediate32(Op.getImm(), STI, O);
else if (RCBits == 64)
- printImmediate64(Op.getImm(), O);
+ printImmediate64(Op.getImm(), STI, O);
else
llvm_unreachable("Invalid register class size");
} else if (Desc.OpInfo[OpNo].OperandType == MCOI::OPERAND_IMMEDIATE) {
- printImmediate32(Op.getImm(), O);
+ printImmediate32(Op.getImm(), STI, O);
} else {
// We hit this for the immediate instruction bits that don't yet have a
// custom printer.
@@ -427,9 +437,9 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
int RCID = Desc.OpInfo[OpNo].RegClass;
unsigned RCBits = AMDGPU::getRegBitWidth(MRI.getRegClass(RCID));
if (RCBits == 32)
- printImmediate32(FloatToBits(Op.getFPImm()), O);
+ printImmediate32(FloatToBits(Op.getFPImm()), STI, O);
else if (RCBits == 64)
- printImmediate64(DoubleToBits(Op.getFPImm()), O);
+ printImmediate64(DoubleToBits(Op.getFPImm()), STI, O);
else
llvm_unreachable("Invalid register class size");
}
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
index 2c54e483271..8a5ce607428 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -81,8 +81,10 @@ private:
void printRegOperand(unsigned RegNo, raw_ostream &O);
void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printImmediate32(uint32_t Imm, raw_ostream &O);
- void printImmediate64(uint64_t Imm, raw_ostream &O);
+ void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printOperandAndFPInputMods(const MCInst *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index 5b128f0e232..cd16fe0d0db 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -117,7 +117,8 @@ static uint32_t getLit32Encoding(uint32_t Val, const MCSubtargetInfo &STI) {
if (Val == FloatToBits(-4.0f))
return 247;
- if (AMDGPU::isVI(STI) && Val == 0x3e22f983) // 1/(2*pi)
+ if (Val == 0x3e22f983 && // 1.0 / (2.0 * pi)
+ STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
return 248;
return 255;
@@ -152,7 +153,8 @@ static uint32_t getLit64Encoding(uint64_t Val, const MCSubtargetInfo &STI) {
if (Val == DoubleToBits(-4.0))
return 247;
- if (AMDGPU::isVI(STI) && Val == 0x3fc45f306dc9c882) // 1/(2*pi)
+ if (Val == 0x3fc45f306dc9c882 && // 1.0 / (2.0 * pi)
+ STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
return 248;
return 255;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d8c98e6897b..cdd98c6ae71 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1725,7 +1725,8 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
(DoubleToBits(2.0) == Val) ||
(DoubleToBits(-2.0) == Val) ||
(DoubleToBits(4.0) == Val) ||
- (DoubleToBits(-4.0) == Val);
+ (DoubleToBits(-4.0) == Val) ||
+ (ST.hasInv2PiInlineImm() && Val == 0x3fc45f306dc9c882);
}
// The actual type of the operand does not seem to matter as long
@@ -1746,7 +1747,8 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
(FloatToBits(2.0f) == Val) ||
(FloatToBits(-2.0f) == Val) ||
(FloatToBits(4.0f) == Val) ||
- (FloatToBits(-4.0f) == Val);
+ (FloatToBits(-4.0f) == Val) ||
+ (ST.hasInv2PiInlineImm() && Val == 0x3e22f983);
}
bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
diff --git a/llvm/test/CodeGen/AMDGPU/imm.ll b/llvm/test/CodeGen/AMDGPU/imm.ll
index 92a14685459..f8e4be44db4 100644
--- a/llvm/test/CodeGen/AMDGPU/imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/imm.ll
@@ -118,6 +118,24 @@ define void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) {
ret void
}
+
+; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_f32:
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e22f983{{$}}
+; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 1/2pi{{$}}
+; GCN: buffer_store_dword [[REG]]
+define void @store_inline_imm_inv_2pi_f32(float addrspace(1)* %out) {
+ store float 0x3FC45F3060000000, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f32:
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbe22f983{{$}}
+; GCN: buffer_store_dword [[REG]]
+define void @store_inline_imm_m_inv_2pi_f32(float addrspace(1)* %out) {
+ store float 0xBFC45F3060000000, float addrspace(1)* %out
+ ret void
+}
+
; GCN-LABEL: {{^}}store_literal_imm_f32:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x45800000
; GCN: buffer_store_dword [[REG]]
@@ -418,6 +436,30 @@ define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) {
ret void
}
+; GCN-LABEL: {{^}}add_inline_imm_inv_2pi_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882
+; SI-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3fc45f30
+; SI: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 1/2pi
+; VI: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_inv_2pi_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, 0x3fc45f306dc9c882
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_m_inv_2pi_f64:
+; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882
+; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbfc45f30
+; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @add_m_inv_2pi_f64(double addrspace(1)* %out, double %x) {
+ %y = fadd double %x, 0xbfc45f306dc9c882
+ store double %y, double addrspace(1)* %out
+ ret void
+}
; GCN-LABEL: {{^}}add_inline_imm_1_f64:
; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
@@ -599,6 +641,24 @@ define void @store_inline_imm_m_4.0_f64(double addrspace(1)* %out) {
ret void
}
+; GCN-LABEL: {{^}}store_inv_2pi_f64:
+; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882
+; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3fc45f30
+; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_inv_2pi_f64(double addrspace(1)* %out) {
+ store double 0x3fc45f306dc9c882, double addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f64:
+; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882
+; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbfc45f30
+; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_inline_imm_m_inv_2pi_f64(double addrspace(1)* %out) {
+ store double 0xbfc45f306dc9c882, double addrspace(1)* %out
+ ret void
+}
+
; GCN-LABEL: {{^}}store_literal_imm_f64:
; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x40b00000
diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s
index a552e6751ba..3367f713555 100644
--- a/llvm/test/MC/AMDGPU/literals.s
+++ b/llvm/test/MC/AMDGPU/literals.s
@@ -429,11 +429,11 @@ v_and_b32_e32 v0, 0xffffffffffffffff, v1
v_trunc_f32_e32 v0, 0x3fc45f306dc9c882
// NOSICI: error: invalid operand for instruction
-// VI: v_fract_f64_e32 v[0:1], 0x3fc45f306dc9c882 ; encoding: [0xf8,0x64,0x00,0x7e]
+// VI: v_fract_f64_e32 v[0:1], 1/2pi ; encoding: [0xf8,0x64,0x00,0x7e]
v_fract_f64_e32 v[0:1], 0x3fc45f306dc9c882
// SICI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
-// VI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xf8,0x38,0x00,0x7e]
+// VI: v_trunc_f32_e32 v0, 1/2pi ; encoding: [0xf8,0x38,0x00,0x7e]
v_trunc_f32_e32 v0, 0x3e22f983
// SICI: v_fract_f64_e32 v[0:1], 0x3e22f983 ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e]
@@ -445,11 +445,11 @@ v_fract_f64_e32 v[0:1], 0x3e22f983
v_trunc_f32_e64 v0, 0x3fc45f306dc9c882
// NOSICI: error: invalid operand for instruction
-// VI: v_fract_f64_e64 v[0:1], 0x3fc45f306dc9c882 ; encoding: [0x00,0x00,0x72,0xd1,0xf8,0x00,0x00,0x00]
+// VI: v_fract_f64_e64 v[0:1], 1/2pi ; encoding: [0x00,0x00,0x72,0xd1,0xf8,0x00,0x00,0x00]
v_fract_f64_e64 v[0:1], 0x3fc45f306dc9c882
// NOSICI: error: invalid operand for instruction
-// VI: v_trunc_f32_e64 v0, 0x3e22f983 ; encoding: [0x00,0x00,0x5c,0xd1,0xf8,0x00,0x00,0x00]
+// VI: v_trunc_f32_e64 v0, 1/2pi ; encoding: [0x00,0x00,0x5c,0xd1,0xf8,0x00,0x00,0x00]
v_trunc_f32_e64 v0, 0x3e22f983
// NOSICI: error: invalid operand for instruction
@@ -457,21 +457,21 @@ v_trunc_f32_e64 v0, 0x3e22f983
v_fract_f64_e64 v[0:1], 0x3e22f983
// NOSICI: error: invalid operand for instruction
-// VI: s_mov_b64 s[0:1], 0x3fc45f306dc9c882 ; encoding: [0xf8,0x01,0x80,0xbe]
+// VI: s_mov_b64 s[0:1], 1/2pi ; encoding: [0xf8,0x01,0x80,0xbe]
s_mov_b64_e32 s[0:1], 0.159154943091895317852646485335
// SICI: v_and_b32_e32 v0, 0x3e22f983, v1 ; encoding: [0xff,0x02,0x00,0x36,0x83,0xf9,0x22,0x3e]
-// VI: v_and_b32_e32 v0, 0x3e22f983, v1 ; encoding: [0xf8,0x02,0x00,0x26]
+// VI: v_and_b32_e32 v0, 1/2pi, v1 ; encoding: [0xf8,0x02,0x00,0x26]
v_and_b32_e32 v0, 0.159154943091895317852646485335, v1
// NOSICI: error: invalid operand for instruction
-// VI: v_and_b32_e64 v0, 0x3e22f983, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf8,0x02,0x02,0x00]
+// VI: v_and_b32_e64 v0, 1/2pi, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf8,0x02,0x02,0x00]
v_and_b32_e64 v0, 0.159154943091895317852646485335, v1
// SICI: v_fract_f64_e32 v[0:1], 0x3fc45f30 ; encoding: [0xff,0x7c,0x00,0x7e,0x30,0x5f,0xc4,0x3f]
-// VI: v_fract_f64_e32 v[0:1], 0x3fc45f306dc9c882 ; encoding: [0xf8,0x64,0x00,0x7e]
+// VI: v_fract_f64_e32 v[0:1], 1/2pi ; encoding: [0xf8,0x64,0x00,0x7e]
v_fract_f64 v[0:1], 0.159154943091895317852646485335
// SICI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e]
-// VI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xf8,0x38,0x00,0x7e]
-v_trunc_f32 v0, 0.159154943091895317852646485335 \ No newline at end of file
+// VI: v_trunc_f32_e32 v0, 1/2pi ; encoding: [0xf8,0x38,0x00,0x7e]
+v_trunc_f32 v0, 0.159154943091895317852646485335
OpenPOWER on IntegriCloud