[Power9] Implement add-pc, multiply-add, modulo, extend-sign-shift, random number, set bool, and dfp test significance

This patch implement the following instructions: - addpcis subpcis - maddhd maddhdu maddld - modsw moduw modsd modud - darn - extswsli extswsli. - setb - dtstsfi dtstsfiq Total 15 instructions Reviewers: nemanjai hfinkel tjablin amehsan kbarton http://reviews.llvm.org/D17885 llvm-svn: 265505
author: Chuang-Yu Cheng <cycheng@multicorewareinc.com> 2016-04-06 01:47:02 +0000
committer: Chuang-Yu Cheng <cycheng@multicorewareinc.com> 2016-04-06 01:47:02 +0000
commit: 024a623c5599bd11839939dfed0eeecbc389201e (patch)
tree: 18d437143f33d3e35eb7128eea65234527d99caf /llvm/lib/Target/PowerPC
parent: eaf4b3d75ca523b19b4da1a329775ae988633c07 (diff)
download: bcm5719-llvm-024a623c5599bd11839939dfed0eeecbc389201e.tar.gz
bcm5719-llvm-024a623c5599bd11839939dfed0eeecbc389201e.zip
7 files changed, 269 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index c35c2b1cf2f..e177b85dbbb 100644
--- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -530,6 +530,10 @@ public:
                                  (Kind == Immediate && isInt<16>(getImm()) &&
                                   (getImm() & 3) == 0); }
   bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); }
+  bool isD8RCRegNumber() const { return Kind == Immediate &&
+                                        isUInt<5>(getImm()) &&
+                                        // required even register id
+                                        !(getImm() & 0x1); }
   bool isVSRegNumber() const { return Kind == Immediate && isUInt<6>(getImm()); }
   bool isCCRegNumber() const { return (Kind == Expression
                                        && isUInt<3>(getExprCRVal())) ||
@@ -592,6 +596,11 @@ public:
     Inst.addOperand(MCOperand::createReg(FRegs[getReg()]));
   }
 
+  void addRegD8RCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(FRegs[getReg()]));
+  }
+
   void addRegVRRCOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createReg(VRegs[getReg()]));
@@ -1222,6 +1231,19 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     Inst = TmpInst;
     break;
   }
+  // ISA3.0 Instructions:
+  case PPC::SUBPCIS:
+  case PPC::LNIA: {
+    MCInst TmpInst;
+    TmpInst.setOpcode(PPC::ADDPCIS);
+    TmpInst.addOperand(Inst.getOperand(0));
+    if (Opcode == PPC::SUBPCIS)
+      addNegOperand(TmpInst, Inst.getOperand(1), getContext());
+    else
+      TmpInst.addOperand(MCOperand::createImm(0));
+    Inst = TmpInst;
+    break;
+  }
   }
 }
 
diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 6ea4fb1bfbc..35ff3b991d5 100644
--- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -242,6 +242,12 @@ static DecodeStatus DecodeF8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
   return decodeRegisterClass(Inst, RegNo, FRegs);
 }
 
+static DecodeStatus DecodeD8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, FRegs);
+}
+
 static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
                                             uint64_t Address,
                                             const void *Decoder) {
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index a5b9ba3aa14..2d5506e7ab9 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1263,6 +1263,63 @@ def : Pat<(atomic_store_64 xaddr:$ptr,  i64:$val), (STDX g8rc:$val, memrr:$ptr)>
 
 let Predicates = [IsISA3_0] in {
 
+// [PO RT RA RB RC XO]
+class VA_RT5_RA5_RB5_RC5<bits<6> xo, string opc, InstrItinClass itin,
+                         list<dag> pattern>
+  : VAForm_1a<xo, (outs g8rc:$rD), (ins g8rc:$rA, g8rc:$rB, g8rc:$rC),
+              !strconcat(opc, " $rD, $rA, $rB, $rC"), itin, pattern>;
+
+// 64-bit Fixed-Point Multiply-Add High-DWord/High-DWord-Unsigned/Low-DWord
+def MADDHD  : VA_RT5_RA5_RB5_RC5<48, "maddhd" , IIC_IntGeneral, []>;
+def MADDHDU : VA_RT5_RA5_RB5_RC5<49, "maddhdu", IIC_IntGeneral, []>;
+def MADDLD  : VA_RT5_RA5_RB5_RC5<51, "maddld ", IIC_IntGeneral, []>;
+
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+// Add PC Immediate Shifted
+def ADDPCIS8 : DX_RD5_IM16<19, 2, (outs g8rc:$rD), (ins s16imm:$IMM),
+                           "addpcis $rD, $IMM", IIC_IntSimple, []>;
+
+// Modulo {Signed/Unsigned}-Word
+def MODSW8 : X_RT5_RA5_RB5<779, "modsw", g8rc, IIC_IntDivW, []>;
+def MODUW8 : X_RT5_RA5_RB5<267, "moduw", g8rc, IIC_IntDivW, []>;
+}
+
+// Modulo {Signed/Unsigned}-DWord
+def MODSD  : X_RT5_RA5_RB5<777, "modsd", g8rc, IIC_IntDivW, []>;
+def MODUD  : X_RT5_RA5_RB5<265, "modud", g8rc, IIC_IntDivW, []>;
+
+// [PO RS RA sh XO sh Rc]
+multiclass XS_RS5_RA5_SH5r<bits<6> opcode, bits<9> xo, string opc,
+                           InstrItinClass itin, list<dag> pattern> {
+  let BaseName = opc in {
+    def NAME : XSForm_1<opcode, xo, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
+                        !strconcat(opc, " $rA, $rS, $SH"), itin, pattern>;
+    let Defs = [CR0] in
+    def o    : XSForm_1<opcode, xo, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
+                        !strconcat(opc, ". $rA, $rS, $SH"), itin, pattern>,
+               isDOT;
+  }
+}
+
+// Deliver A Random Number
+def DARN : X_RD5_L2<31, 755, (outs g8rc:$rD), (ins u2imm:$L), "darn $rD, $L",
+                    IIC_IntGeneral, []>;
+
+// Extend-Sign Word and Shift Left Immediate
+defm EXTSWSLI  : XS_RS5_RA5_SH5r<31, 445, "extswsli" , IIC_IntShift, []>;
+
+// Set Boolean
+def SETB : X_RD5_BFA3<31, 128, (outs g8rc:$rD), (ins u3imm:$BFA),
+                      "setb $rD, $BFA", IIC_IntGeneral, []>;
+
+// DFP Test Significance Immediate [Quad]
+def DTSTSFI  : X_BF3_IM6_RS5<59, 675,
+                             (outs crbitrc:$BF), (ins u6imm:$UIM, f8rc:$FRB),
+                             "dtstsfi $BF, $UIM, $FRB", IIC_FPGeneral, []>;
+def DTSTSFIQ : X_BF3_IM6_RS5<63,  675,
+                             (outs crbitrc:$BF), (ins u6imm:$UIM, d8rc:$FRBp),
+                             "dtstsfiq $BF, $UIM, $FRBp", IIC_FPGeneral, []>;
+
 class X_L1_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, RegisterOperand ty,
                    InstrItinClass itin, list<dag> pattern>
   : X_L1_RS5_RS5<opcode, xo, (outs), (ins ty:$rA, ty:$rB, u1imm:$L),
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 9ab88241e6a..4a868dd8970 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -376,6 +376,23 @@ class DQ_RD6_RS5_DQ12<bits<6> opcode, bits<3> xo, dag OOL, dag IOL,
   let Inst{29-31} = xo;
 }
 
+// DX-Form: [PO RT d1 d0 XO d2]
+class DX_RD5_IM16<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+                  InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  RD;
+  bits<16> IMM;
+
+  let Pattern = pattern;
+
+  let Inst{6-10}  = RD;
+  // IMM = d0 || d1 || d2
+  let Inst{11-15} = IMM{5-1};   // d1
+  let Inst{16-25} = IMM{15-6};  // d0
+  let Inst{26-30} = xo;
+  let Inst{31}    = IMM{0};     // d2
+}
+
 // 1.7.6 X-Form
 class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, 
                       InstrItinClass itin, list<dag> pattern>
@@ -762,6 +779,12 @@ class XForm_htm3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   let Inst{31}    = RC;
 }
 
+// [PO RT RA RB XO /]
+class X_RT5_RA5_RB5<bits<10> xo, string opc, RegisterOperand type,
+                    InstrItinClass itin, list<dag> pattern>
+  : XForm_base_r3xo<31, xo, (outs type:$rD), (ins type:$rA, type:$rB),
+                    !strconcat(opc, " $rD, $rA, $rB"), itin, pattern>;
+
 // e.g. [PO VRT XO VRB XO /] or [PO VRT XO VRB XO RO]
 class X_RD5_XO5_RS5<bits<6> opcode, bits<5> xo2, bits<10> xo, dag OOL, dag IOL,
                     string asmstr, InstrItinClass itin, list<dag> pattern>
@@ -769,6 +792,57 @@ class X_RD5_XO5_RS5<bits<6> opcode, bits<5> xo2, bits<10> xo, dag OOL, dag IOL,
   let A = xo2;
 }
 
+// [PO RT /// L /// XO /]
+class X_RD5_L2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> RD;
+  bits<2> L;
+
+  let Pattern = pattern;
+
+  let Inst{6-10}  = RD;
+  let Inst{11-13} = 0;
+  let Inst{14-15} = L;
+  let Inst{16-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+// [PO RT BFA // /// XO /]
+class X_RD5_BFA3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                 InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> RD;
+  bits<3> BFA;
+
+  let Pattern = pattern;
+
+  let Inst{6-10}  = RD;
+  let Inst{11-13} = BFA;
+  let Inst{14-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+// [PO BF / UIM FRB XO /] or [PO BF / UIM FRBp XO /]
+class X_BF3_IM6_RS5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                    string asmstr, InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<3> BF;
+  bits<6> UIM;
+  bits<5> FRB;
+
+  let Pattern = pattern;
+
+  let Inst{6-8}   = BF;
+  let Inst{9}     = 0;
+  let Inst{10-15} = UIM;
+  let Inst{16-20} = FRB;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
 class X_BF3_DCMX7_RS5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
                       string asmstr, InstrItinClass itin, list<dag> pattern>
   : I<opcode, OOL, IOL, asmstr, itin> {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 4970e908e4d..0fb2d14333f 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -423,6 +423,12 @@ def PPCRegF4RCAsmOperand : AsmOperandClass {
 def f4rc : RegisterOperand<F4RC> {
   let ParserMatchClass = PPCRegF4RCAsmOperand;
 }
+def PPCRegD8RCAsmOperand : AsmOperandClass {
+  let Name = "RegD8RC"; let PredicateMethod = "isD8RCRegNumber";
+}
+def d8rc : RegisterOperand<D8RC> {
+  let ParserMatchClass = PPCRegD8RCAsmOperand;
+}
 def PPCRegVRRCAsmOperand : AsmOperandClass {
   let Name = "RegVRRC"; let PredicateMethod = "isRegNumber";
 }
@@ -4188,6 +4194,17 @@ def : Pat<(atomic_store_32 xaddr:$ptr, i32:$val), (STWX gprc:$val, memrr:$ptr)>;
 
 let Predicates = [IsISA3_0] in {
 
+// Add PC Immediate Shifted
+def ADDPCIS : DX_RD5_IM16<19, 2, (outs gprc:$rD), (ins s16imm:$IMM),
+                          "addpcis $rD, $IMM", IIC_IntSimple, []>;
+// Extended Mnemonics of ADDPCIS
+def SUBPCIS : PPCAsmPseudo<"subpcis $rD, $IMM", (ins gprc:$rD, s16imm:$IMM)>;
+def LNIA    : PPCAsmPseudo<"lnia $rD",          (ins gprc:$rD)>;
+
+// Modulo {Signed/Unsigned}-Word
+def MODSW : X_RT5_RA5_RB5<779, "modsw", gprc, IIC_IntDivW, []>;
+def MODUW : X_RT5_RA5_RB5<267, "moduw", gprc, IIC_IntDivW, []>;
+
 // Copy-Paste Facility
 // We prefix 'CP' to COPY due to name conflict in Target.td. We also prefix to
 // PASTE for naming consistency.
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index e5f363c443c..39a34023e7c 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -18,6 +18,8 @@ def sub_un : SubRegIndex<1, 3>;
 def sub_32 : SubRegIndex<32>;
 def sub_64 : SubRegIndex<64>;
 def sub_128 : SubRegIndex<128>;
+def subreg_l64 : SubRegIndex<64, 0>;
+def subreg_h64 : SubRegIndex<64, 64>;
 }
 
 
@@ -49,6 +51,14 @@ class FPR<bits<5> num, string n> : PPCReg<n> {
   let HWEncoding{4-0} = num;
 }
 
+// DPR - One of the 16 128 bit floating-point registers
+// It is composed of FPR_even and FPR_odd
+class DPR<FPR even, FPR odd, string n> : PPCReg<n> {
+  let HWEncoding = even.HWEncoding;
+  let SubRegs = [even, odd];
+  let SubRegIndices = [subreg_l64, subreg_h64];
+}
+
 // QFPR - One of the 32 256-bit floating-point vector registers (used for QPX)
 class QFPR<FPR SubReg, string n> : PPCReg<n> {
   let HWEncoding = SubReg.HWEncoding;
@@ -116,6 +126,13 @@ foreach Index = 0-31 in {
                 DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
 }
 
+// 128-bit Paired Floating-point registers
+foreach Index = 0-15 in {
+  def D#Index :
+    DPR<!cast<FPR>("F"#!shl(Index, 1)),
+        !cast<FPR>("F"#!add(!shl(Index, 1), 1)), "f"#!shl(Index, 1)>;
+}
+
 // Floating-point vector subregisters (for VSX)
 foreach Index = 0-31 in {
   def VF#Index : VF<Index, "vs" # !add(Index, 32)>;
@@ -288,6 +305,9 @@ def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13),
                                                 (sequence "F%u", 31, 14))>;
 def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
 
+def D8RC : RegisterClass<"PPC", [f128], 128, (add (sequence "D%u",  0, 6),
+                                                  (sequence "D%u", 15, 7))>;
+
 def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32], 128,
                          (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
                              V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
diff --git a/llvm/lib/Target/PowerPC/README_P9.txt b/llvm/lib/Target/PowerPC/README_P9.txt
index 3fd4ab9fca6..77729cad027 100644
--- a/llvm/lib/Target/PowerPC/README_P9.txt
+++ b/llvm/lib/Target/PowerPC/README_P9.txt
@@ -575,6 +575,79 @@ Move to CR from XER Extended (mcrxrx):
 - Is there a use for this in LLVM?
 
 Fixed Point Facility:
+- Add PC Immediate Shifted: addpcis subpcis
+  . Thinking to use it on PC relative addressing mode?
+
+- 64-bit Fixed-Point Multiply-Add Low DWord: maddld
+  . SDAG:
+    (set i64:$rD, (add (mul $rA, $rB), $rC))
+
+- 64-bit Fixed-Point Multiply-Add High-DWord/High-DWord-Unsigned: maddhd maddhdu
+  . Use intrinsic:
+    (set i64:$rD, (int_ppc_maddhd i64:$rA, i64:$rB), i64:$rC))
+    (set i64:$rD, (int_ppc_maddhdu i64:$rA, i64:$rB), i64:$rC))
+
+- Modulo {Signed/Unsigned}-{Word/DWord}: modsw moduw modsd modud
+  . Map modulo signed to llvm srem, modulo unsigned to urem because each pair
+    has same semantics, as follows:
+
+    llvm srem:
+    1. This instruction returns the remainder of a division (where the result is
+       either zero or has the same sign as the dividend, op1)
+    2. Undefined behavior:
+       - <anything> % 0
+       - Overflow: e.g. -2147483648 % -1
+         In this case, the remainder doesn’t actually overflow, but this rule
+         lets srem be implemented using instructions that return both the result
+         of the division and the remainder.
+
+    Modulo Signed:
+    1. remainder = dividend - (quotient × divisor)
+        where
+                    0 ≤ remainder < |divisor|, if the dividend ≥ 0
+           -|divisor| < remainder ≤  0       , if the dividend < 0
+
+    2. Undefined behavior:
+      - <anything> % 0
+      - 0x8000_0000 % -1
+
+  . SDAG:
+    (set i32:$rD, (srem i32:$rA, i32:$rB))  // modsw
+    (set i32:$rD, (urem i32:$rA, i32:$rB))  // moduw
+    (set i64:$rD, (srem i64:$rA, i64:$rB))  // modsd
+    (set i64:$rD, (urem i64:$rA, i64:$rB))  // modud
+
+  . Note:
+    The quotient is not supplied as a result in modulo word (32-bit)
+    instructions
+
+- Deliver A Random Number: darn
+  . Intrinsic?
+    (set i64:$rD, (int_ppc_darn i2:$L))
+
+  . Thinking for using it on c/c++ rand() implementation
+
+- Extend-Sign Word and Shift Left Immediate: extswsli extswsli.
+  . SDAG:
+    (set i64:$rA, shl((sext i32:$rS, i64), i6$SH))
+
+- Set Boolean: setb
+  . Thinking to use it on:
+
+    if (cond)
+      return true;
+    return false;
+
+  . Need Intrinsic?
+    (set i64:rD, (int_ppc_setb i3:$BFA))
+
+- DFP Test Significance Immediate [Quad]: dtstsfi dtstsfiq
+  . Need write inline assembly to test paired floating point register
+    allocation for dtstsfiq
+
+  . Intrinsics:
+    (set i1:$BF, (int_ppc_dtstsfi i6:$UIM, f64:$FRB))
+    (set i1:$BF, (int_ppc_dtstsfiq i6:$UIM, f128:$FRBp))
 
 - Copy-Paste Facility: copy copy_first cp_abort paste paste. paste_last
   . Use instrinstics:
author	Chuang-Yu Cheng <cycheng@multicorewareinc.com>	2016-04-06 01:47:02 +0000
committer	Chuang-Yu Cheng <cycheng@multicorewareinc.com>	2016-04-06 01:47:02 +0000
commit	024a623c5599bd11839939dfed0eeecbc389201e (patch)
tree	18d437143f33d3e35eb7128eea65234527d99caf /llvm/lib/Target/PowerPC
parent	eaf4b3d75ca523b19b4da1a329775ae988633c07 (diff)
download	bcm5719-llvm-024a623c5599bd11839939dfed0eeecbc389201e.tar.gz bcm5719-llvm-024a623c5599bd11839939dfed0eeecbc389201e.zip