diff options
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 50 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir | 31 | ||||
| -rw-r--r-- | llvm/test/TableGen/GlobalISelEmitterRegSequence.td | 62 | ||||
| -rw-r--r-- | llvm/utils/TableGen/GlobalISelEmitter.cpp | 74 | 
5 files changed, 191 insertions, 36 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index bc503e7481d..e3dd0c22d33 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1136,18 +1136,28 @@ def : GCNPat <    (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit  >; -} // End let AddedComplexity = 1 +// FIXME: The implicit-def of scc from S_[X]OR_B32 is mishandled + // def : GCNPat < +//   (fneg (f64 SReg_64:$src)), +//   (REG_SEQUENCE SReg_64, +//     (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), +//     sub0, +//     (S_XOR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), +//                (i32 (S_MOV_B32 (i32 0x80000000)))), +//     sub1) +// >; + +// def : GCNPat < +//   (fneg (fabs (f64 SReg_64:$src))), +//   (REG_SEQUENCE SReg_64, +//     (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), +//     sub0, +//     (S_OR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), +//               (S_MOV_B32 (i32 0x80000000))), // Set sign bit. +//     sub1) +// >; -// FIXME: Should use S_OR_B32 -def : GCNPat < -  (fneg (fabs f64:$src)), -  (REG_SEQUENCE VReg_64, -    (i32 (EXTRACT_SUBREG f64:$src, sub0)), -    sub0, -    (V_OR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)), -                  (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit. -    sub1) ->; +} // End let AddedComplexity = 1  def : GCNPat <    (fabs (f32 VGPR_32:$src)), @@ -1189,16 +1199,28 @@ def : GCNPat <       sub1)  >; +// TODO: Use SGPR for constant  def : GCNPat < -  (fneg f64:$src), +  (fneg (f64 VReg_64:$src)),    (REG_SEQUENCE VReg_64, -    (i32 (EXTRACT_SUBREG f64:$src, sub0)), +    (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)),      sub0, -    (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)), +    (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)),                     (i32 (V_MOV_B32_e32 (i32 0x80000000)))),      sub1)  >; +// TODO: Use SGPR for constant +def : GCNPat < +  (fneg (fabs (f64 VReg_64:$src))), +  (REG_SEQUENCE VReg_64, +    (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)), +    sub0, +    (V_OR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)), +                  (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit. +    sub1) +>; +  def : GCNPat <    (fcopysign f16:$src0, f16:$src1),    (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir index e1dd3293f4c..037b6bf2ad4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir @@ -219,9 +219,13 @@ body: |      liveins: $vgpr0_vgpr1      ; GCN-LABEL: name: fabs_s64_vv      ; GCN: liveins: $vgpr0_vgpr1 -    ; GCN: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 -    ; GCN: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]] -    ; GCN: $vgpr0_vgpr1 = COPY [[FABS]](s64) +    ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 +    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec +    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 +    ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec +    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 +    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1 +    ; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]]      %0:vgpr(s64) = COPY $vgpr0_vgpr1      %1:vgpr(s64) = G_FABS %0      $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir index 29e3c2ea87e..ed0109e1f5b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir @@ -220,9 +220,13 @@ body: |      liveins: $vgpr0_vgpr1      ; GCN-LABEL: name: fneg_s64_vv      ; GCN: liveins: $vgpr0_vgpr1 -    ; GCN: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 -    ; GCN: [[FNEG:%[0-9]+]]:vreg_64(s64) = G_FNEG [[COPY]] -    ; GCN: $vgpr0_vgpr1 = COPY [[FNEG]](s64) +    ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 +    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483648, implicit $exec +    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 +    ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec +    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 +    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e32_]], %subreg.sub1 +    ; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]]      %0:vgpr(s64) = COPY $vgpr0_vgpr1      %1:vgpr(s64) = G_FNEG %0      $vgpr0_vgpr1 = COPY %1 @@ -481,10 +485,13 @@ body: |      liveins: $vgpr0_vgpr1      ; GCN-LABEL: name: fneg_fabs_s64_vv      ; GCN: liveins: $vgpr0_vgpr1 -    ; GCN: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 -    ; GCN: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]] -    ; GCN: [[FNEG:%[0-9]+]]:vreg_64(s64) = G_FNEG [[FABS]] -    ; GCN: $vgpr0_vgpr1 = COPY [[FNEG]](s64) +    ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 +    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483648, implicit $exec +    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 +    ; GCN: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec +    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 +    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e32_]], %subreg.sub1 +    ; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]]      %0:vgpr(s64) = COPY $vgpr0_vgpr1      %1:vgpr(s64) = G_FABS %0      %2:vgpr(s64) = G_FNEG %1 @@ -503,9 +510,13 @@ body: |      ; GCN-LABEL: name: fneg_fabs_s64_vs      ; GCN: liveins: $sgpr0_sgpr1      ; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 -    ; GCN: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]] -    ; GCN: [[FNEG:%[0-9]+]]:vreg_64(s64) = G_FNEG [[FABS]] -    ; GCN: $vgpr0_vgpr1 = COPY [[FNEG]](s64) +    ; GCN: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]] +    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_MOV_B32_e32 2147483648, implicit $exec +    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub1(s64) +    ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e32 [[COPY1]](s32), [[V_MOV_B32_e32_]](s32), implicit $exec +    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64) +    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e32_]](s16), %subreg.sub1 +    ; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]](s64)      %0:sgpr(s64) = COPY $sgpr0_sgpr1      %1:vgpr(s64) = G_FABS %0      %2:vgpr(s64) = G_FNEG %1 diff --git a/llvm/test/TableGen/GlobalISelEmitterRegSequence.td b/llvm/test/TableGen/GlobalISelEmitterRegSequence.td new file mode 100644 index 00000000000..64d338c7515 --- /dev/null +++ b/llvm/test/TableGen/GlobalISelEmitterRegSequence.td @@ -0,0 +1,62 @@ +// RUN: llvm-tblgen %s -gen-global-isel -optimize-match-table=false -I %p/../../include -I %p/Common -o - | FileCheck %s + +include "llvm/Target/Target.td" +include "GlobalISelEmitterCommon.td" + +// Boilerplate code for setting up some registers with subregs. +class MyReg<string n, list<Register> subregs = []> +  : Register<n> { +  let SubRegs = subregs; +} + +class MyClass<int size, list<ValueType> types, dag registers> +  : RegisterClass<"Test", types, size, registers> { +  let Size = size; +} + +def sub0 : SubRegIndex<16>; +def sub1 : SubRegIndex<16, 16>; +def S0 : MyReg<"s0">; +def S1 : MyReg<"s1">; +def SRegs : MyClass<16, [i16], (sequence "S%u", 0, 1)>; + +let SubRegIndices = [sub0, sub1] in { +def D0 : MyReg<"d0", [S0, S1]>; +} + +def DRegs : MyClass<32, [i32], (sequence "D%u", 0, 0)>; +def SOP : RegisterOperand<SRegs>; +def DOP : RegisterOperand<DRegs>; +def SOME_INSN : I<(outs DRegs:$dst), (ins DOP:$src), []>; +def SUBSOME_INSN : I<(outs SRegs:$dst), (ins SOP:$src), []>; + +// CHECK: GIM_CheckNumOperands, /*MI*/0, /*Expected*/2, +// CHECK-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_SEXT, +// CHECK-NEXT: // MIs[0] dst +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32, +// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/Test::DRegsRegClassID, +// CHECK-NEXT: // MIs[0] src +// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s16, +// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/1, /*RC*/Test::SRegsRegClassID, +// CHECK-NEXT: // (sext:{ *:[i32] } SOP:{ *:[i16] }:$src)  =>  (REG_SEQUENCE:{ *:[i32] } DRegs:{ *:[i32] }, (SUBSOME_INSN:{ *:[i16] } SOP:{ *:[i16] }:$src), sub0:{ *:[i32] }, (SUBSOME_INSN:{ *:[i16] } SOP:{ *:[i16] }:$src), sub1:{ *:[i32] }) +// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/0, /*TypeID*/GILLT_s16, +// CHECK-NEXT: GIR_MakeTempReg, /*TempRegID*/1, /*TypeID*/GILLT_s16, +// CHECK-NEXT: GIR_BuildMI, /*InsnID*/2, /*Opcode*/MyTarget::SUBSOME_INSN, +// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/2, /*TempRegID*/1, /*TempRegFlags*/RegState::Define, +// CHECK-NEXT: GIR_Copy, /*NewInsnID*/2, /*OldInsnID*/0, /*OpIdx*/1, // src +// CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/2, +// CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/MyTarget::SUBSOME_INSN, +// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/RegState::Define, +// CHECK-NEXT: GIR_Copy, /*NewInsnID*/1, /*OldInsnID*/0, /*OpIdx*/1, // src +// CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/1, +// CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::REG_SEQUENCE, +// CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst +// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/0, /*TempRegFlags*/0, +// CHECK-NEXT: GIR_AddImm, /*InsnID*/0, /*SubRegIndex*/1, +// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/1, /*TempRegFlags*/0, +// CHECK-NEXT: GIR_AddImm, /*InsnID*/0, /*SubRegIndex*/2, +// CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, +// CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/0, +def : Pat<(i32 (sext SOP:$src)), +          (REG_SEQUENCE DRegs, (SUBSOME_INSN SOP:$src), sub0, +                               (SUBSOME_INSN SOP:$src), sub1)>; diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index 2a7be0cb821..fbf0b4bd9dd 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -2296,6 +2296,7 @@ public:      OR_CopyConstantAsImm,      OR_CopyFConstantAsFPImm,      OR_Imm, +    OR_SubRegIndex,      OR_Register,      OR_TempRegister,      OR_ComplexPattern, @@ -2610,6 +2611,28 @@ public:    }  }; +/// Adds an enum value for a subreg index to the instruction being built. +class SubRegIndexRenderer : public OperandRenderer { +protected: +  unsigned InsnID; +  const CodeGenSubRegIndex *SubRegIdx; + +public: +  SubRegIndexRenderer(unsigned InsnID, const CodeGenSubRegIndex *SRI) +      : OperandRenderer(OR_SubRegIndex), InsnID(InsnID), SubRegIdx(SRI) {} + +  static bool classof(const OperandRenderer *R) { +    return R->getKind() == OR_SubRegIndex; +  } + +  void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override { +    Table << MatchTable::Opcode("GIR_AddImm") << MatchTable::Comment("InsnID") +          << MatchTable::IntValue(InsnID) << MatchTable::Comment("SubRegIndex") +          << MatchTable::IntValue(SubRegIdx->EnumValue) +          << MatchTable::LineBreak; +  } +}; +  /// Adds operands by calling a renderer function supplied by the ComplexPattern  /// matcher function.  class RenderComplexPatternOperand : public OperandRenderer { @@ -2890,7 +2913,9 @@ private:  public:    MakeTempRegisterAction(const LLTCodeGen &Ty, unsigned TempRegID) -      : Ty(Ty), TempRegID(TempRegID) {} +      : Ty(Ty), TempRegID(TempRegID) { +    KnownTypes.insert(Ty); +  }    void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {      Table << MatchTable::Opcode("GIR_MakeTempReg") @@ -4163,12 +4188,9 @@ Expected<action_iterator> GlobalISelEmitter::createInstructionRenderer(    // COPY_TO_REGCLASS is just a copy with a ConstrainOperandToRegClassAction    // attached. Similarly for EXTRACT_SUBREG except that's a subregister copy. -  if (DstI->TheDef->getName() == "COPY_TO_REGCLASS") +  StringRef Name = DstI->TheDef->getName(); +  if (Name == "COPY_TO_REGCLASS" || Name == "EXTRACT_SUBREG")      DstI = &Target.getInstruction(RK.getDef("COPY")); -  else if (DstI->TheDef->getName() == "EXTRACT_SUBREG") -    DstI = &Target.getInstruction(RK.getDef("COPY")); -  else if (DstI->TheDef->getName() == "REG_SEQUENCE") -    return failedImport("Unable to emit REG_SEQUENCE");    return M.insertAction<BuildMIAction>(InsertPt, M.allocateOutputInsnID(),                                         DstI); @@ -4189,8 +4211,11 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(    const CodeGenInstruction *DstI = DstMIBuilder.getCGI();    CodeGenInstruction *OrigDstI = &Target.getInstruction(Dst->getOperator()); +  StringRef Name = OrigDstI->TheDef->getName(); +  unsigned ExpectedDstINumUses = Dst->getNumChildren(); +    // EXTRACT_SUBREG needs to use a subregister COPY. -  if (OrigDstI->TheDef->getName() == "EXTRACT_SUBREG") { +  if (Name == "EXTRACT_SUBREG") {      if (!Dst->getChild(0)->isLeaf())        return failedImport("EXTRACT_SUBREG child #1 is not a leaf"); @@ -4220,10 +4245,41 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(      return failedImport("EXTRACT_SUBREG child #1 is not a subreg index");    } +  if (Name == "REG_SEQUENCE") { +    if (!Dst->getChild(0)->isLeaf()) +      return failedImport("REG_SEQUENCE child #0 is not a leaf"); + +    Record *RCDef = getInitValueAsRegClass(Dst->getChild(0)->getLeafValue()); +    if (!RCDef) +      return failedImport("REG_SEQUENCE child #0 could not " +                          "be coerced to a register class"); + +    if ((ExpectedDstINumUses - 1) % 2 != 0) +      return failedImport("Malformed REG_SEQUENCE"); + +    for (unsigned I = 1; I != ExpectedDstINumUses; I += 2) { +      TreePatternNode *ValChild = Dst->getChild(I); +      TreePatternNode *SubRegChild = Dst->getChild(I + 1); + +      if (DefInit *SubRegInit = +              dyn_cast<DefInit>(SubRegChild->getLeafValue())) { +        CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef()); + +        auto InsertPtOrError = +            importExplicitUseRenderer(InsertPt, M, DstMIBuilder, ValChild); +        if (auto Error = InsertPtOrError.takeError()) +          return std::move(Error); +        InsertPt = InsertPtOrError.get(); +        DstMIBuilder.addRenderer<SubRegIndexRenderer>(SubIdx); +      } +    } + +    return InsertPt; +  } +    // Render the explicit uses.    unsigned DstINumUses = OrigDstI->Operands.size() - OrigDstI->Operands.NumDefs; -  unsigned ExpectedDstINumUses = Dst->getNumChildren(); -  if (OrigDstI->TheDef->getName() == "COPY_TO_REGCLASS") { +  if (Name == "COPY_TO_REGCLASS") {      DstINumUses--; // Ignore the class constraint.      ExpectedDstINumUses--;    }  | 

