summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2020-01-07 11:29:05 -0500
committerMatt Arsenault <arsenm2@gmail.com>2020-01-09 10:29:31 -0500
commit3952748ffdf017f83faddcb1240cb36cb4bb9c5b (patch)
tree020bf845b71ad0406ee1f6dad36f0d47e438fef1 /llvm
parent0274ed9dc75a0efb2b6130122226ee45f7e57dde (diff)
downloadbcm5719-llvm-3952748ffdf017f83faddcb1240cb36cb4bb9c5b.tar.gz
bcm5719-llvm-3952748ffdf017f83faddcb1240cb36cb4bb9c5b.zip
AMDGPU/GlobalISel: Fix add of neg inline constant pattern
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUGISel.td3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td15
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir113
5 files changed, 139 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index d7c211f1ed9..05836101431 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -204,3 +204,6 @@ foreach Ty = [i64, p0, p1, p4] in {
def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">,
GISDNodeXFormEquiv<as_i32timm>;
+
+def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">,
+ GISDNodeXFormEquiv<NegateImm>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index a41c8f1a6a3..a632e7aece1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2097,6 +2097,12 @@ void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
MIB.addImm(CstVal.getValue());
}
+void AMDGPUInstructionSelector::renderNegateImm(MachineInstrBuilder &MIB,
+ const MachineInstr &MI) const {
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+ MIB.addImm(-MI.getOperand(1).getCImm()->getSExtValue());
+}
+
bool AMDGPUInstructionSelector::isInlineImmediate16(int64_t Imm) const {
return AMDGPU::isInlinableLiteral16(Imm, STI.hasInv2PiInlineImm());
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 45782ab3185..8d44c588864 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -169,6 +169,9 @@ private:
void renderTruncImm32(MachineInstrBuilder &MIB,
const MachineInstr &MI) const;
+ void renderNegateImm(MachineInstrBuilder &MIB,
+ const MachineInstr &MI) const;
+
bool isInlineImmediate16(int64_t Imm) const;
bool isInlineImmediate32(int64_t Imm) const;
bool isInlineImmediate64(int64_t Imm) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index dcc139a9fe9..4c8197975ce 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1930,9 +1930,22 @@ def : GCNPat <
// TODO: Also do for 64-bit.
def : GCNPat<
(add i32:$src0, (i32 NegSubInlineConst32:$src1)),
- (S_SUB_I32 $src0, NegSubInlineConst32:$src1)
+ (S_SUB_I32 SReg_32:$src0, NegSubInlineConst32:$src1)
>;
+def : GCNPat<
+ (add i32:$src0, (i32 NegSubInlineConst32:$src1)),
+ (V_SUB_U32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> {
+ let SubtargetPredicate = HasAddNoCarryInsts;
+}
+
+def : GCNPat<
+ (add i32:$src0, (i32 NegSubInlineConst32:$src1)),
+ (V_SUB_I32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> {
+ let SubtargetPredicate = NotHasAddNoCarryInsts;
+}
+
+
// Avoid pointlessly materializing a constant in VGPR.
// FIXME: Should also do this for readlane, but tablegen crashes on
// the ignored src1.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir
index 43931132107..b0f9cc52ae1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir
@@ -53,3 +53,116 @@ body: |
S_ENDPGM 0, implicit %9
...
+
+---
+name: add_neg_inline_const_64_to_sub_s32_s
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX6-LABEL: name: add_neg_inline_const_64_to_sub_s32_s
+ ; GFX6: liveins: $sgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX6: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], 64, implicit-def $scc
+ ; GFX6: S_ENDPGM 0, implicit [[S_SUB_I32_]]
+ ; GFX9-LABEL: name: add_neg_inline_const_64_to_sub_s32_s
+ ; GFX9: liveins: $sgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], 64, implicit-def $scc
+ ; GFX9: S_ENDPGM 0, implicit [[S_SUB_I32_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = G_CONSTANT i32 -64
+ %2:sgpr(s32) = G_ADD %0, %1
+ S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: add_neg_inline_const_64_to_sub_s32_v
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: add_neg_inline_const_64_to_sub_s32_v
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967232, implicit $exec
+ ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX6: S_ENDPGM 0, implicit %2
+ ; GFX9-LABEL: name: add_neg_inline_const_64_to_sub_s32_v
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY]], 64, 0, implicit $exec
+ ; GFX9: S_ENDPGM 0, implicit [[V_SUB_U32_e64_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = G_CONSTANT i32 -64
+ %2:vgpr(s32) = G_ADD %0, %1
+ S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: add_neg_inline_const_16_to_sub_s32_s
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+
+ ; GFX6-LABEL: name: add_neg_inline_const_16_to_sub_s32_s
+ ; GFX6: liveins: $sgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+ ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX6: S_ENDPGM 0, implicit [[S_ADD_U32_]]
+ ; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_s
+ ; GFX9: liveins: $sgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+ ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_]]
+ %0:sgpr(s32) = COPY $sgpr0
+ %1:sgpr(s32) = G_CONSTANT i32 16
+ %2:sgpr(s32) = G_ADD %0, %1
+ S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: add_neg_inline_const_16_to_sub_s32_v
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: add_neg_inline_const_16_to_sub_s32_v
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
+ ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX6: S_ENDPGM 0, implicit %2
+ ; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_v
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
+ ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX9: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(s32) = G_CONSTANT i32 16
+ %2:vgpr(s32) = G_ADD %0, %1
+ S_ENDPGM 0, implicit %2
+
+...
OpenPOWER on IntegriCloud