diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-05-03 13:42:56 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-05-03 13:42:56 +0000 |
| commit | 2c8936fd264239db1438358fc01c83412aa161ed (patch) | |
| tree | 73b1456d7a147dae1b8fa36c20f8dbc2484c4e72 | |
| parent | 2636460f0e1cb582f3793775efbffdc02bf55d23 (diff) | |
| download | bcm5719-llvm-2c8936fd264239db1438358fc01c83412aa161ed.tar.gz bcm5719-llvm-2c8936fd264239db1438358fc01c83412aa161ed.zip | |
AMDGPU: Fix incorrect commute with sub when folding immediates
When a fold of an immediate into a sub/subrev required shrinking the
instruction, the wrong VOP2 opcode was used. This was using the VOP2
equivalent of the original instruction, not the commuted instruction
with the inverted opcode.
llvm-svn: 359883
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir | 16 |
2 files changed, 12 insertions, 9 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 75d201245b8..8db2ee21603 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -372,7 +372,10 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList, assert(MI->getOperand(1).isDef()); - int Op32 = AMDGPU::getVOPe32(Opc); + // Make sure to get the 32-bit version of the commuted opcode. + unsigned MaybeCommutedOpc = MI->getOpcode(); + int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc); + FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true, Op32)); return true; diff --git a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir index 1abd98c7a81..5e58210571d 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir @@ -250,8 +250,8 @@ body: | ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_sub_i32_e64_no_carry_out_use ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_SUBREV_I32_e32_]] + ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_SUB_I32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, 0, implicit $exec @@ -269,8 +269,8 @@ body: | ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_sub_i32_e64_no_carry_out_use ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 - ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_SUB_I32_e32_]] + ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_SUBREV_I32_e32_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32_xm0 = S_MOV_B32 12345 %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, 0, implicit $exec @@ -288,8 +288,8 @@ body: | ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_subrev_i32_e64_no_carry_out_use ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_SUB_I32_e32_]] + ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_SUBREV_I32_e32_]] %0:sreg_32_xm0 = S_MOV_B32 12345 %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, 0, implicit $exec @@ -307,8 +307,8 @@ body: | ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_subrev_i32_e64_no_carry_out_use ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345 - ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_SUBREV_I32_e32_]] + ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_SUB_I32_e32_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32_xm0 = S_MOV_B32 12345 %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, 0, implicit $exec |

