diff options
Diffstat (limited to 'llvm')
4 files changed, 174 insertions, 2 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 4ac8d6ab8be..b6ab610bdd2 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -395,6 +395,7 @@ class AMDGPULDSF32Intrin<string clang_builtin> :      [IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>]  >; +// FIXME: The m0 argument should be moved after the normal arguments  class AMDGPUDSOrderedIntrinsic : Intrinsic<    [llvm_i32_ty],    // M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index c20b7976a4b..e0f2ccb63fe 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -222,6 +222,20 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(      const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };      return addMappingFromTable<2>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));    } +  case Intrinsic::amdgcn_ds_ordered_add: +  case Intrinsic::amdgcn_ds_ordered_swap: { +    // VGPR = M0, VGPR +    static const OpRegBankEntry<3> Table[2] = { +      // Perfectly legal. +      { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID  }, 1 }, + +      // Need a readfirstlane for m0 +      { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 } +    }; + +    const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } }; +    return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table)); +  }    default:      return RegisterBankInfo::getInstrAlternativeMappings(MI);    } @@ -1042,6 +1056,14 @@ void AMDGPURegisterBankInfo::applyMappingImpl(        executeInWaterfallLoop(MI, MRI, { 2 });        return;      } +    case Intrinsic::amdgcn_ds_ordered_add: +    case Intrinsic::amdgcn_ds_ordered_swap: { +      // This is only allowed to execute with 1 lane, so readfirstlane is safe. +      assert(empty(OpdMapper.getVRegs(0))); +      substituteSimpleCopyRegs(OpdMapper, 3); +      constrainOpWithReadfirstlane(MI, MRI, 2); // M0 +      return; +    }      default:        break;      } @@ -1741,8 +1763,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {      case Intrinsic::amdgcn_atomic_dec:        return getDefaultMappingAllVGPR(MI);      case Intrinsic::amdgcn_ds_ordered_add: -    case Intrinsic::amdgcn_ds_ordered_swap: -      return getInvalidInstructionMapping(); +    case Intrinsic::amdgcn_ds_ordered_swap: { +      unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); +      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize); +      unsigned M0Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI, +                                 AMDGPU::SGPRRegBankID); +      OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32); +      OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); +      break; +    }      case Intrinsic::amdgcn_exp_compr:        OpdsMapping[0] = nullptr; // IntrinsicID        // FIXME: These are immediate values which can't be read from registers. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.add.mir new file mode 100644 index 00000000000..3d5c515fcb8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.add.mir @@ -0,0 +1,71 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: ds_ordered_add_ss +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $sgpr1 +    ; CHECK-LABEL: name: ds_ordered_add_ss +    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 +    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) +    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[COPY]](s32), [[COPY2]](s32), 0, 0, 0, 0, 0, 0 +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $sgpr1 +    %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0, %1, 0, 0, 0, 0, 0, 0 +... + +--- +name: ds_ordered_add_vs +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0, $sgpr0 +    ; CHECK-LABEL: name: ds_ordered_add_vs +    ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 +    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) +    ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec +    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[V_READFIRSTLANE_B32_]], [[COPY2]](s32), 0, 0, 0, 0, 0, 0 +    %0:_(s32) = COPY $vgpr0 +    %1:_(s32) = COPY $sgpr0 +    %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0, %1, 0, 0, 0, 0, 0, 0 +... + +--- +name: ds_ordered_add_vv +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0, $vgpr1 +    ; CHECK-LABEL: name: ds_ordered_add_vv +    ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 +    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 +    ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec +    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[V_READFIRSTLANE_B32_]], [[COPY1]](s32), 0, 0, 0, 0, 0, 0 +    %0:_(s32) = COPY $vgpr0 +    %1:_(s32) = COPY $vgpr1 +    %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0, %1, 0, 0, 0, 0, 0, 0 +... + +--- +name: ds_ordered_add_sv +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0, $sgpr0 +    ; CHECK-LABEL: name: ds_ordered_add_sv +    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[COPY]](s32), [[COPY1]](s32), 0, 0, 0, 0, 0, 0 +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $vgpr0 +    %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0, %1, 0, 0, 0, 0, 0, 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.swap.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.swap.mir new file mode 100644 index 00000000000..afb64cc1182 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.ordered.swap.mir @@ -0,0 +1,71 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: ds_ordered_swap_ss +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $sgpr1 +    ; CHECK-LABEL: name: ds_ordered_swap_ss +    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 +    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) +    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[COPY]](s32), [[COPY2]](s32), 0, 0, 0, 0, 0, 0 +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $sgpr1 +    %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0, %1, 0, 0, 0, 0, 0, 0 +... + +--- +name: ds_ordered_swap_vs +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0, $sgpr0 +    ; CHECK-LABEL: name: ds_ordered_swap_vs +    ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 +    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) +    ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec +    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[V_READFIRSTLANE_B32_]], [[COPY2]](s32), 0, 0, 0, 0, 0, 0 +    %0:_(s32) = COPY $vgpr0 +    %1:_(s32) = COPY $sgpr0 +    %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0, %1, 0, 0, 0, 0, 0, 0 +... + +--- +name: ds_ordered_swap_vv +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0, $vgpr1 +    ; CHECK-LABEL: name: ds_ordered_swap_vv +    ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 +    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 +    ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec +    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[V_READFIRSTLANE_B32_]], [[COPY1]](s32), 0, 0, 0, 0, 0, 0 +    %0:_(s32) = COPY $vgpr0 +    %1:_(s32) = COPY $vgpr1 +    %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0, %1, 0, 0, 0, 0, 0, 0 +... + +--- +name: ds_ordered_swap_sv +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0, $sgpr0 +    ; CHECK-LABEL: name: ds_ordered_swap_sv +    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[COPY]](s32), [[COPY1]](s32), 0, 0, 0, 0, 0, 0 +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $vgpr0 +    %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0, %1, 0, 0, 0, 0, 0, 0 +...  | 

