summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td15
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td35
-rw-r--r--llvm/lib/Target/AMDGPU/VOP2Instructions.td14
5 files changed, 69 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index f1d899c4d00..79d47f59551 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -79,6 +79,12 @@ def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts",
"Have scratch_* flat memory instructions"
>;
+def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
+ "AddNoCarryInsts",
+ "true",
+ "Have VALU add/sub instructions without carry out"
+>;
+
def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
"UnalignedBufferAccess",
"true",
@@ -464,7 +470,8 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
FeatureFastFMAF32, FeatureDPP,
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
- FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts
+ FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
+ FeatureAddNoCarryInsts
]
>;
@@ -681,6 +688,12 @@ def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
AssemblerPredicate<"FeatureFlatGlobalInsts">;
+def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarryInsts()">,
+ AssemblerPredicate<"FeatureAddNoCarryInsts">;
+
+def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarryInsts()">,
+ AssemblerPredicate<"!FeatureAddNoCarryInsts">;
+
def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
AssemblerPredicate<"Feature16BitInsts">;
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 77961762901..f4484b9c653 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -167,6 +167,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FlatInstOffsets(false),
FlatGlobalInsts(false),
FlatScratchInsts(false),
+ AddNoCarryInsts(false),
R600ALUInst(false),
CaymanISA(false),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index d4b6a5fe802..389fdc9d636 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -159,6 +159,7 @@ protected:
bool FlatInstOffsets;
bool FlatGlobalInsts;
bool FlatScratchInsts;
+ bool AddNoCarryInsts;
bool R600ALUInst;
bool CaymanISA;
bool CFALUBug;
@@ -419,6 +420,10 @@ public:
return FlatScratchInsts;
}
+ bool hasAddNoCarry() const {
+ return AddNoCarryInsts;
+ }
+
bool isMesaKernel(const MachineFunction &MF) const {
return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv());
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index ba69e42d912..0c34b8d2260 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1300,8 +1300,43 @@ def : IntMed3Pat<V_MED3_U16, umax, umax_oneuse, umin_oneuse, i16>;
// Assembler aliases
//============================================================================//
+multiclass NoCarryAlias<string Inst,
+ Instruction Inst32NC, Instruction Inst64NC,
+ Instruction Inst32CO, Instruction Inst64CO> {
+ def : InstAlias<Inst#" $vdst, $src0, $src1",
+ (Inst32NC VGPR_32:$vdst, VSrc_b32:$src0, VGPR_32:$src1), 1000>,
+ Requires<[HasAddNoCarryInsts]>;
+
+ def : InstAlias<Inst#" $vdst, $src0, $src1",
+ (Inst64NC VGPR_32:$vdst, VCSrc_b32:$src0, VCSrc_b32:$src1), -10>,
+ Requires<[HasAddNoCarryInsts]>;
+
+ def : InstAlias<Inst#" $vdst, vcc, $src0, $src1",
+ (Inst32CO VGPR_32:$vdst, VSrc_b32:$src0, VGPR_32:$src1), 1000>,
+ Requires<[HasAddNoCarryInsts]>;
+
+ def : InstAlias<Inst#" $vdst, $sdst, $src0, $src1",
+ (Inst64CO VGPR_32:$vdst, SReg_64:$sdst, VSrc_b32:$src0, VGPR_32:$src1), -10>,
+ Requires<[HasAddNoCarryInsts]>;
+}
+
+// gfx9 made a mess of add instruction names. The existing add
+// instructions add _co added to the names, and their old names were
+// repurposed to a version without carry out.
+let Predicates = [HasAddNoCarryInsts] in {
+defm : NoCarryAlias<"v_add_u32", V_ADD_U32_e32_vi, V_ADD_U32_e64_vi,
+ V_ADD_I32_e32_vi, V_ADD_I32_e64_vi>;
+defm : NoCarryAlias<"v_sub_u32", V_SUB_U32_e32_vi, V_SUB_U32_e64_vi,
+ V_SUB_I32_e32_vi, V_SUB_I32_e64_vi>;
+defm : NoCarryAlias<"v_subrev_u32",
+ V_SUBREV_U32_e32_vi, V_SUBREV_U32_e64_vi,
+ V_SUBREV_I32_e32_vi, V_SUBREV_I32_e64_vi>;
+}
+
+let Predicates = [NotHasAddNoCarryInsts] in {
def : MnemonicAlias<"v_add_u32", "v_add_i32">;
def : MnemonicAlias<"v_sub_u32", "v_sub_i32">;
def : MnemonicAlias<"v_subrev_u32", "v_subrev_i32">;
+}
} // End isGCN predicate
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index d5acb49b4f3..9f3df2b1d43 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -375,6 +375,14 @@ defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag,
defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1>;
defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1>;
defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">;
+
+
+let SubtargetPredicate = HasAddNoCarryInsts in {
+defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32>;
+defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32>;
+defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32, null_frag, "v_sub_u32">;
+}
+
} // End isCommutable = 1
// These are special and do not read the exec mask.
@@ -833,3 +841,9 @@ def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
} // End SubtargetPredicate = isVI
+
+let SubtargetPredicate = HasAddNoCarryInsts in {
+defm V_ADD_U32 : VOP2_Real_e32e64_vi <0x34>;
+defm V_SUB_U32 : VOP2_Real_e32e64_vi <0x35>;
+defm V_SUBREV_U32 : VOP2_Real_e32e64_vi <0x36>;
+}
OpenPOWER on IntegriCloud