diff options
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 34 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 9 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir | 86 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir | 22 | 
6 files changed, 155 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index a5406ae11c7..fd1b29384ce 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -18,6 +18,11 @@ def gi_vsrc0 :      GIComplexOperandMatcher<s32, "selectVSRC0">,      GIComplexPatternEquiv<sd_vsrc0>; +def sd_vcsrc : ComplexPattern<i32, 1, "">; +def gi_vcsrc : +    GIComplexOperandMatcher<s32, "selectVCSRC">, +    GIComplexPatternEquiv<sd_vcsrc>; +  def gi_vop3mods0 :      GIComplexOperandMatcher<s32, "selectVOP3Mods0">,      GIComplexPatternEquiv<VOP3Mods0>; @@ -60,6 +65,26 @@ class GISelVop2CommutePat <    (inst src0_vt:$src0, src1_vt:$src1)  >; +class GISelVop3Pat2 < +  SDPatternOperator node, +  Instruction inst, +  ValueType dst_vt, +  ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt>   : GCNPat < + +  (dst_vt (node (src0_vt (sd_vcsrc src0_vt:$src0)), (src1_vt (sd_vcsrc src1_vt:$src1)))), +  (inst src0_vt:$src0, src1_vt:$src1) +>; + +class GISelVop3Pat2CommutePat < +  SDPatternOperator node, +  Instruction inst, +  ValueType dst_vt, +  ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt>   : GCNPat < + +  (dst_vt (node (src0_vt (sd_vcsrc src0_vt:$src0)), (src1_vt (sd_vcsrc src1_vt:$src1)))), +  (inst src0_vt:$src1, src1_vt:$src0) +>; +  multiclass GISelVop2IntrPat <    SDPatternOperator node, Instruction inst,    ValueType dst_vt, ValueType src_vt = dst_vt> { @@ -76,6 +101,15 @@ multiclass GISelVop2IntrPat <  def : GISelSop2Pat <or, S_OR_B32, i32>;  def : GISelVop2Pat <or, V_OR_B32_e32, i32>; +def : GISelSop2Pat <sra, S_ASHR_I32, i32>; +let AddedComplexity = 100 in { +let SubtargetPredicate = isSICI in { +def : GISelVop2Pat <sra, V_ASHR_I32_e32, i32>; +} +def : GISelVop2CommutePat <sra, V_ASHRREV_I32_e32, i32>; +} +def : GISelVop3Pat2CommutePat <sra, V_ASHRREV_I32_e64, i32>; +  // FIXME: Select directly to _e32 so we don't need to deal with modifiers.  // FIXME: We can't re-use SelectionDAG patterns here because they match  // against a custom SDNode and we would need to create a generic machine diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index f98f57ea047..80a1bc9fe17 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -537,6 +537,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,    switch (I.getOpcode()) {    default:      break; +  case TargetOpcode::G_ASHR:    case TargetOpcode::G_SITOFP:    case TargetOpcode::G_FMUL:    case TargetOpcode::G_FADD: @@ -564,6 +565,14 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,    return false;  } +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const { +  return {{ +      [=](MachineInstrBuilder &MIB) { MIB.add(Root); } +  }}; + +} +  ///  /// This will select either an SGPR or VGPR operand and will save us from  /// having to write an extra tablegen pattern. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 8283ab51741..fee10417531 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -73,6 +73,9 @@ private:    bool selectG_STORE(MachineInstr &I) const;    InstructionSelector::ComplexRendererFns +  selectVCSRC(MachineOperand &Root) const; + +  InstructionSelector::ComplexRendererFns    selectVSRC0(MachineOperand &Root) const;    InstructionSelector::ComplexRendererFns diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index c4298331cf2..ecfa2011c4e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -55,6 +55,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const SISubtarget &ST,    };    setAction({G_ADD, S32}, Legal); +  setAction({G_ASHR, S32}, Legal);    setAction({G_SUB, S32}, Legal);    setAction({G_MUL, S32}, Legal);    setAction({G_AND, S32}, Legal); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir new file mode 100644 index 00000000000..86692c3e1d6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir @@ -0,0 +1,86 @@ +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,SI +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,VI + +--- | +  define void @ashr(i32 addrspace(1)* %global0) {ret void} +... +--- + +name:            ashr +legalized:       true +regBankSelected: true + +# GCN-LABEL: name: ashr +body: | +  bb.0: +    liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4 +    ; GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0 +    ; GCN: [[SGPR1:%[0-9]+]]:sreg_32 = COPY $sgpr1 +    ; GCN: [[VGPR0:%[0-9]+]]:vgpr_32 = COPY $vgpr0 +    %0:sgpr(s32) = COPY $sgpr0 +    %1:sgpr(s32) = COPY $sgpr1 +    %2:vgpr(s32) = COPY $vgpr0 +    %3:vgpr(s64) = COPY $vgpr3_vgpr4 + +    ; GCN: [[C1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1 +    ; GCN: [[C4096:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096 +    %4:sgpr(s32) = G_CONSTANT i32 1 +    %5:sgpr(s32) = G_CONSTANT i32 4096 + +    ; ashr ss +    ; GCN: [[SS:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[SGPR0]], [[SGPR1]] +    %6:sgpr(s32) = G_ASHR %0, %1 + +    ; ashr si +    ; GCN: [[SI:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[SS]], [[C1]] +    %7:sgpr(s32) = G_ASHR %6, %4 + +    ; ashr is +    ; GCN: [[IS:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[C1]], [[SI]] +    %8:sgpr(s32) = G_ASHR %4, %7 + +    ; ashr sc +    ; GCN: [[SC:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[IS]], [[C4096]] +    %9:sgpr(s32) = G_ASHR %8, %5 + +    ; ashr cs +    ; GCN: [[CS:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[C4096]], [[SC]] +    %10:sgpr(s32) = G_ASHR %5, %9 + +    ; ashr vs +    ; GCN: [[VS:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[CS]], [[VGPR0]] +    %11:vgpr(s32) = G_ASHR %2, %10 + +    ; ashr sv +    ; SI: [[SV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[CS]], [[VS]] +    ; VI: [[SV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[VS]], [[CS]] +    %12:vgpr(s32) = G_ASHR %10, %11 + +    ; ashr vv +    ; SI: [[VV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[SV]], [[VGPR0]] +    ; VI: [[VV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[VGPR0]], [[SV]] +    %13:vgpr(s32) = G_ASHR %12, %2 + +    ; ashr iv +    ; SI: [[IV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[C1]], [[VV]] +    ; VI: [[IV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[VV]], [[C1]] +    %14:vgpr(s32) = G_ASHR %4, %13 + +    ; ashr vi +    ; GCN: [[VI:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[C1]], [[IV]] +    %15:vgpr(s32) = G_ASHR %14, %4 + +    ; ashr cv +    ; SI: [[CV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[C4096]], [[VI]] +    ; VI: [[CV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[VI]], [[C4096]] +    %16:vgpr(s32) = G_ASHR %5, %15 + +    ; ashr vc +    ; GCN: [[VC:%[-1-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[C4096]], [[CV]] +    %17:vgpr(s32) = G_ASHR %16, %5 + + +    G_STORE %17, %3 :: (store 4 into %ir.global0) + +... +--- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir new file mode 100644 index 00000000000..71a9de8e6bf --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -0,0 +1,22 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s + +--- +name:            test_ashr +registers: +  - { id: 0, class: _ } +  - { id: 1, class: _ } +  - { id: 2, class: _ } +body: | +  bb.0.entry: +    liveins: $vgpr0, $vgpr1 + +    ; CHECK-LABEL: name: test_ashr +    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 +    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 +    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]] +    %0(s32) = COPY $vgpr0 +    %1(s32) = COPY $vgpr1 +    %2(s32) = G_ASHR %0, %1 +    $vgpr0 = COPY %2 +...  | 

