[AMDGPU] gfx1010 core wave32 changes

Differential Revision: https://reviews.llvm.org/D63204 llvm-svn: 363934
author: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> 2019-06-20 15:08:34 +0000
committer: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> 2019-06-20 15:08:34 +0000
commit: 0846c125f98ba6e1013b5acd1b161bd32b395bf8 (patch)
tree: 79fc1901c99ea344863b882ce9d34bd7a5d012b3 /llvm/lib
parent: 36358cd3ed0f27af941074677e530a8c9aea8802 (diff)
download: bcm5719-llvm-0846c125f98ba6e1013b5acd1b161bd32b395bf8.tar.gz
bcm5719-llvm-0846c125f98ba6e1013b5acd1b161bd32b395bf8.zip
10 files changed, 56 insertions, 40 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 4ff109b7938..225268d646d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -777,7 +777,7 @@ def FeatureISAVersion10_1_0 : FeatureSet<
      FeatureLDSBankCount32,
      FeatureDLInsts,
      FeatureNSAEncoding,
-     FeatureWavefrontSize64,
+     FeatureWavefrontSize32,
      FeatureScalarStores,
      FeatureScalarAtomics,
      FeatureScalarFlatScratchInsts,
@@ -795,7 +795,7 @@ def FeatureISAVersion10_1_1 : FeatureSet<
      FeatureDot5Insts,
      FeatureDot6Insts,
      FeatureNSAEncoding,
-     FeatureWavefrontSize64,
+     FeatureWavefrontSize32,
      FeatureScalarStores,
      FeatureScalarAtomics,
      FeatureScalarFlatScratchInsts,
@@ -812,7 +812,7 @@ def FeatureISAVersion10_1_2 : FeatureSet<
      FeatureDot5Insts,
      FeatureDot6Insts,
      FeatureNSAEncoding,
-     FeatureWavefrontSize64,
+     FeatureWavefrontSize32,
      FeatureScalarStores,
      FeatureScalarAtomics,
      FeatureScalarFlatScratchInsts,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index e6f2bd77280..4a844695549 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -50,19 +50,19 @@ def AMDGPUFmasOp : SDTypeProfile<1, 4,
 def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
 def AMDGPUIfOp : SDTypeProfile<1, 2,
-  [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
+  [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
 >;
 
 def AMDGPUElseOp : SDTypeProfile<1, 2,
-  [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, OtherVT>]
+  [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
 >;
 
 def AMDGPULoopOp : SDTypeProfile<0, 2,
-  [SDTCisVT<0, i64>, SDTCisVT<1, OtherVT>]
+  [SDTCisVT<0, i1>, SDTCisVT<1, OtherVT>]
 >;
 
 def AMDGPUIfBreakOp : SDTypeProfile<1, 2,
-  [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, i64>]
+  [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, i1>]
 >;
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 6c1ef983fb7..6b3f68c7395 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -101,6 +101,12 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
   return addOperand(Inst, MCOperand::createImm(Imm));
 }
 
+static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val,
+                                  uint64_t Addr, const void *Decoder) {
+  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+  return addOperand(Inst, DAsm->decodeBoolReg(Val));
+}
+
 #define DECODE_OPERAND(StaticDecoderName, DecoderName) \
 static DecodeStatus StaticDecoderName(MCInst &Inst, \
                                        unsigned Imm, \
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 99e8da67094..f96b03dcd2c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -946,6 +946,15 @@ public:
   /// not exist. If Opcode is not a pseudo instruction, this is identity.
   int pseudoToMCOpcode(int Opcode) const;
 
+  const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
+                                         const TargetRegisterInfo *TRI,
+                                         const MachineFunction &MF)
+    const override {
+    if (OpNum >= TID.getNumOperands())
+      return nullptr;
+    return RI.getRegClass(TID.OpInfo[OpNum].RegClass);
+  }
+
   void fixImplicitOperands(MachineInstr &MI) const;
 };
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index ac7777de3fb..ba9cd868643 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -766,6 +766,15 @@ def VOPDstS64orS32 : BoolRC {
   let PrintMethod = "printVOPDst";
 }
 
+// SCSrc_i1 is the operand for pseudo instructions only.
+// Boolean immeadiates shall not be exposed to codegen instructions.
+def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_IMM_INT32";
+  let ParserMatchClass = BoolReg;
+  let DecoderMethod = "decodeBoolReg";
+}
+
 // ===----------------------------------------------------------------------===//
 // ExpSrc* Special cases for exp src operands which are printed as
 // "off" depending on en operand.
@@ -804,11 +813,12 @@ def SDWASrc_i16 : SDWASrc<i16>;
 def SDWASrc_f32 : SDWASrc<f32>;
 def SDWASrc_f16 : SDWASrc<f16>;
 
-def SDWAVopcDst : VOPDstOperand<SReg_64> {
+def SDWAVopcDst : BoolRC {
   let OperandNamespace = "AMDGPU";
   let OperandType = "OPERAND_SDWA_VOPC_DST";
   let EncoderMethod = "getSDWAVopcDstEncoding";
   let DecoderMethod = "decodeSDWAVopcDst";
+  let PrintMethod = "printVOPDst";
 }
 
 class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass {
@@ -940,11 +950,6 @@ def f32kimm : kimmOperand<i32>;
 def KImmFP16MatchClass : KImmMatchClass<16>;
 def f16kimm : kimmOperand<i16>;
 
-
-def VOPDstS64 : VOPDstOperand <SReg_64> {
-  let PrintMethod = "printVOPDst";
-}
-
 class FPInputModsMatchClass <int opSize> : AsmOperandClass {
   let Name = "RegOrImmWithFP"#opSize#"InputMods";
   let ParserMethod = "parseRegOrImmWithFPInputMods";
@@ -1237,7 +1242,7 @@ class getVALUDstForVT<ValueType VT> {
                           !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
                             !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
                               !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>,
-                              VOPDstOperand<SReg_64>)))); // else VT == i1
+                              VOPDstS64orS32)))); // else VT == i1
 }
 
 // Returns the register class to use for the destination of VOP[12C]
@@ -1313,7 +1318,7 @@ class getVOP3SrcForVT<ValueType VT> {
            VSrc_f64,
            VSrc_b64),
         !if(!eq(VT.Value, i1.Value),
-           SCSrc_i1,
+           SSrc_i1,
            !if(isFP,
               !if(!eq(VT.Value, f16.Value),
                  VSrc_f16,
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index d8738a8b119..630aeeb8777 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -121,14 +121,14 @@ def WWM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>;
 
 } // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
 
-def ENTER_WWM : SPseudoInstSI <(outs SReg_64:$sdst), (ins i64imm:$src0)> {
+def ENTER_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins i64imm:$src0)> {
   let Defs = [EXEC];
   let hasSideEffects = 0;
   let mayLoad = 0;
   let mayStore = 0;
 }
 
-def EXIT_WWM : SPseudoInstSI <(outs SReg_64:$sdst), (ins SReg_64:$src0)> {
+def EXIT_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> {
   let hasSideEffects = 0;
   let mayLoad = 0;
   let mayStore = 0;
@@ -161,11 +161,11 @@ def S_SUB_U64_PSEUDO : SPseudoInstSI <
 >;
 
 def S_ADD_U64_CO_PSEUDO : SPseudoInstSI <
-  (outs SReg_64:$vdst, VOPDstS64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
+  (outs SReg_64:$vdst, VOPDstS64orS32:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
 >;
 
 def S_SUB_U64_CO_PSEUDO : SPseudoInstSI <
-  (outs SReg_64:$vdst, VOPDstS64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
+  (outs SReg_64:$vdst, VOPDstS64orS32:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
 >;
 } // End usesCustomInserter = 1, Defs = [SCC]
 
@@ -233,30 +233,30 @@ let isTerminator = 1 in {
 let OtherPredicates = [EnableLateCFGStructurize] in {
  def SI_NON_UNIFORM_BRCOND_PSEUDO : CFPseudoInstSI <
   (outs),
-  (ins SReg_64:$vcc, brtarget:$target),
+  (ins SReg_1:$vcc, brtarget:$target),
   [(brcond i1:$vcc, bb:$target)]> {
     let Size = 12;
 }
 }
 
 def SI_IF: CFPseudoInstSI <
-  (outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target),
-  [(set i64:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> {
+  (outs SReg_1:$dst), (ins SReg_1:$vcc, brtarget:$target),
+  [(set i1:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> {
   let Constraints = "";
   let Size = 12;
   let hasSideEffects = 1;
 }
 
 def SI_ELSE : CFPseudoInstSI <
-  (outs SReg_64:$dst),
-  (ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
+  (outs SReg_1:$dst),
+  (ins SReg_1:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
   let Size = 12;
   let hasSideEffects = 1;
 }
 
 def SI_LOOP : CFPseudoInstSI <
-  (outs), (ins SReg_64:$saved, brtarget:$target),
-  [(AMDGPUloop i64:$saved, bb:$target)], 1, 1> {
+  (outs), (ins SReg_1:$saved, brtarget:$target),
+  [(AMDGPUloop i1:$saved, bb:$target)], 1, 1> {
   let Size = 8;
   let isBranch = 1;
   let hasSideEffects = 1;
@@ -265,8 +265,7 @@ def SI_LOOP : CFPseudoInstSI <
 } // End isTerminator = 1
 
 def SI_END_CF : CFPseudoInstSI <
-  (outs), (ins SReg_64:$saved),
-  [(int_amdgcn_end_cf i64:$saved)], 1, 1> {
+  (outs), (ins SReg_1:$saved), [], 1, 1> {
   let Size = 4;
   let isAsCheapAsAMove = 1;
   let isReMaterializable = 1;
@@ -276,8 +275,7 @@ def SI_END_CF : CFPseudoInstSI <
 }
 
 def SI_IF_BREAK : CFPseudoInstSI <
-  (outs SReg_64:$dst), (ins SReg_64:$vcc, SReg_64:$src),
-  [(set i64:$dst, (int_amdgcn_if_break i1:$vcc, i64:$src))]> {
+  (outs SReg_1:$dst), (ins SReg_1:$vcc, SReg_1:$src), []> {
   let Size = 4;
   let isAsCheapAsAMove = 1;
   let isReMaterializable = 1;
@@ -303,7 +301,7 @@ multiclass PseudoInstKill <dag ins> {
   }
 }
 
-defm SI_KILL_I1 : PseudoInstKill <(ins SSrc_b64:$src, i1imm:$killvalue)>;
+defm SI_KILL_I1 : PseudoInstKill <(ins SCSrc_i1:$src, i1imm:$killvalue)>;
 defm SI_KILL_F32_COND_IMM : PseudoInstKill <(ins VSrc_b32:$src0, i32imm:$src1, i32imm:$cond)>;
 
 let Defs = [EXEC,VCC] in
@@ -322,7 +320,7 @@ def SI_BR_UNDEF : SPseudoInstSI <(outs), (ins sopp_brtarget:$simm16)> {
 }
 
 def SI_PS_LIVE : PseudoInstSI <
-  (outs SReg_64:$dst), (ins),
+  (outs SReg_1:$dst), (ins),
   [(set i1:$dst, (int_amdgcn_ps_live))]> {
   let SALU = 1;
 }
@@ -584,7 +582,7 @@ def : GCNPat<
 >;
 
 def : GCNPat<
-  (AMDGPUelse i64:$src, bb:$target),
+  (AMDGPUelse i1:$src, bb:$target),
   (SI_ELSE $src, $target, 0)
 >;
 
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 8df3c313616..32e3575baeb 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -733,8 +733,6 @@ def SSrcOrLds_b32 : RegisterOperand<SRegOrLds_32> {
 
 defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ;
 
-def SCSrc_i1 : RegisterOperand<SReg_64_XEXEC>;
-
 //===----------------------------------------------------------------------===//
 //  VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index e7be776817f..1465f6ba209 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -344,7 +344,7 @@ def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp
   let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi";
   let AsmDPP16 = AsmDPP#"$fi";
   let Outs32 = (outs DstRC:$vdst);
-  let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+  let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
 }
 
 // Write out to vcc or arbitrary SGPR and read in from vcc or
@@ -358,7 +358,7 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*
   let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi";
   let AsmDPP16 = AsmDPP#"$fi";
   let Outs32 = (outs DstRC:$vdst);
-  let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+  let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
 
   // Suppress src2 implied by type since the 32-bit encoding uses an
   // implicit VCC use.
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index a15c0571ae7..74d1c636065 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -183,7 +183,7 @@ class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
   let HasModifiers = 0;
   let HasClamp = 0;
   let HasOMod = 0;
-  let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+  let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
   let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
 }
 
@@ -203,7 +203,7 @@ def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> {
   // FIXME: Hack to stop printing _e64
   let DstRC = RegisterOperand<VReg_64>;
 
-  let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+  let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
   let Asm64 = " $vdst, $sdst, $src0, $src1, $src2$clamp";
 }
 
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index fb4370af024..b3513e383d1 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -56,7 +56,7 @@ class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt
   let Asm32 = "$src0, $src1";
   // The destination for 32-bit encoding is implicit.
   let HasDst32 = 0;
-  let Outs64 = (outs VOPDstS64:$sdst);
+  let Outs64 = (outs VOPDstS64orS32:$sdst);
   list<SchedReadWrite> Schedule = sched;
 }
author	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	2019-06-20 15:08:34 +0000
committer	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	2019-06-20 15:08:34 +0000
commit	0846c125f98ba6e1013b5acd1b161bd32b395bf8 (patch)
tree	79fc1901c99ea344863b882ce9d34bd7a5d012b3 /llvm/lib
parent	36358cd3ed0f27af941074677e530a8c9aea8802 (diff)
download	bcm5719-llvm-0846c125f98ba6e1013b5acd1b161bd32b395bf8.tar.gz bcm5719-llvm-0846c125f98ba6e1013b5acd1b161bd32b395bf8.zip