summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h27
-rw-r--r--llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir24
-rw-r--r--llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir29
-rw-r--r--llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll40
6 files changed, 110 insertions, 16 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 7414519aee1..672e49184a5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1144,5 +1144,8 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
MFI->ArgInfo.WorkItemIDZ)))
return true;
+ MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
+ MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
+
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index e70a51bfd3d..a3f6caaacc8 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -418,7 +418,8 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)),
FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
- ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)) {}
+ ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),
+ Mode(MFI.getMode()) {}
void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 2cbca8930a6..a8928dacf77 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -232,6 +232,31 @@ template <> struct MappingTraits<SIArgumentInfo> {
}
};
+// Default to default mode for default calling convention.
+struct SIMode {
+ bool IEEE = true;
+ bool DX10Clamp = true;
+
+ SIMode() = default;
+
+
+ SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
+ IEEE = Mode.IEEE;
+ DX10Clamp = Mode.DX10Clamp;
+ }
+
+ bool operator ==(const SIMode Other) const {
+ return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
+ }
+};
+
+template <> struct MappingTraits<SIMode> {
+ static void mapping(IO &YamlIO, SIMode &Mode) {
+ YamlIO.mapOptional("ieee", Mode.IEEE, true);
+ YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
+ }
+};
+
struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
uint64_t ExplicitKernArgSize = 0;
unsigned MaxKernArgAlign = 0;
@@ -247,6 +272,7 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
StringValue StackPtrOffsetReg = "$sp_reg";
Optional<SIArgumentInfo> ArgInfo;
+ SIMode Mode;
SIMachineFunctionInfo() = default;
SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
@@ -275,6 +301,7 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
StringValue("$sp_reg"));
YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
+ YamlIO.mapOptional("mode", MFI.Mode, SIMode());
}
};
diff --git a/llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir b/llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir
index b53813a3d91..c2bf002a92d 100644
--- a/llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir
+++ b/llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir
@@ -1,20 +1,5 @@
# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s
---- |
- define amdgpu_ps void @omod_inst_flag_nsz_src() {
- unreachable
- }
-
- define amdgpu_ps void @omod_inst_flag_nsz_result() {
- unreachable
- }
-
- define amdgpu_ps void @omod_inst_flag_nsz_both() {
- unreachable
- }
-
-...
-
---
# FIXME: Is it OK to fold omod for this?
@@ -24,6 +9,9 @@
# GCN-NEXT: S_ENDPGM 0, implicit %1
name: omod_inst_flag_nsz_src
tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
body: |
bb.0:
@@ -42,6 +30,9 @@ body: |
name: omod_inst_flag_nsz_result
tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
body: |
bb.0:
@@ -60,6 +51,9 @@ body: |
name: omod_inst_flag_nsz_both
tracksRegLiveness: true
+machineFunctionInfo:
+ mode:
+ ieee: false
body: |
bb.0:
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
index 73d0855f612..4523af65645 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
@@ -22,6 +22,9 @@
# FULL-NEXT: workGroupIDX: { reg: '$sgpr6' }
# FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
# FULL-NEXT: workItemIDX: { reg: '$vgpr0' }
+# FULL-NEXT: mode:
+# FULL-NEXT: ieee: true
+# FULL-NEXT: dx10-clamp: true
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
@@ -85,6 +88,9 @@ body: |
# FULL-NEXT: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr33' }
+# FULL-NEXT: mode:
+# FULL-NEXT: ieee: true
+# FULL-NEXT: dx10-clamp: true
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
@@ -117,6 +123,9 @@ body: |
# FULL-NEXT: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr33' }
+# FULL-NEXT: mode:
+# FULL-NEXT: ieee: true
+# FULL-NEXT: dx10-clamp: true
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
@@ -150,6 +159,9 @@ body: |
# FULL-NEXT: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr33' }
+# FULL-NEXT: mode:
+# FULL-NEXT: ieee: true
+# FULL-NEXT: dx10-clamp: true
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
@@ -214,3 +226,20 @@ body: |
S_ENDPGM 0
...
+
+---
+# ALL-LABEL: name: parse_mode
+# ALL: mode:
+# ALL-NEXT: ieee: false
+# ALL-NEXT: dx10-clamp: false
+name: parse_mode
+machineFunctionInfo:
+ mode:
+ ieee: false
+ dx10-clamp: false
+
+body: |
+ bb.0:
+ S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
index 0fdbce5208d..79d3d82cc84 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
@@ -25,6 +25,9 @@
; CHECK-NEXT: workGroupIDX: { reg: '$sgpr6' }
; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
; CHECK-NEXT: workItemIDX: { reg: '$vgpr0' }
+; CHECK-NEXT: mode:
+; CHECK-NEXT: ieee: true
+; CHECK-NEXT: dx10-clamp: true
; CHECK-NEXT: body:
define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
%gep = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %arg0
@@ -48,6 +51,9 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
; CHECK-NEXT: argumentInfo:
; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr3' }
; CHECK-NEXT: implicitBufferPtr: { reg: '$sgpr0_sgpr1' }
+; CHECK-NEXT: mode:
+; CHECK-NEXT: ieee: false
+; CHECK-NEXT: dx10-clamp: true
; CHECK-NEXT: body:
define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
ret void
@@ -69,6 +75,9 @@ define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
; CHECK-NEXT: argumentInfo:
; CHECK-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr33' }
+; CHECK-NEXT: mode:
+; CHECK-NEXT: ieee: true
+; CHECK-NEXT: dx10-clamp: true
; CHECK-NEXT: body:
define void @function() {
ret void
@@ -90,9 +99,40 @@ define void @function() {
; CHECK-NEXT: argumentInfo:
; CHECK-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
; CHECK-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr33' }
+; CHECK-NEXT: mode:
+; CHECK-NEXT: ieee: true
+; CHECK-NEXT: dx10-clamp: true
; CHECK-NEXT: body:
define void @function_nsz() #0 {
ret void
}
+; CHECK-LABEL: {{^}}name: function_dx10_clamp_off
+; CHECK: mode:
+; CHECK-NEXT: ieee: true
+; CHECK-NEXT: dx10-clamp: false
+define void @function_dx10_clamp_off() #1 {
+ ret void
+}
+
+; CHECK-LABEL: {{^}}name: function_ieee_off
+; CHECK: mode:
+; CHECK-NEXT: ieee: false
+; CHECK-NEXT: dx10-clamp: true
+define void @function_ieee_off() #2 {
+ ret void
+}
+
+; CHECK-LABEL: {{^}}name: function_ieee_off_dx10_clamp_off
+; CHECK: mode:
+; CHECK-NEXT: ieee: false
+; CHECK-NEXT: dx10-clamp: false
+define void @function_ieee_off_dx10_clamp_off() #3 {
+ ret void
+}
+
attributes #0 = { "no-signed-zeros-fp-math" = "true" }
+
+attributes #1 = { "amdgpu-dx10-clamp" = "false" }
+attributes #2 = { "amdgpu-ieee" = "false" }
+attributes #3 = { "amdgpu-dx10-clamp" = "false" "amdgpu-ieee" = "false" }
OpenPOWER on IntegriCloud