diff options
| author | Manman Ren <mren@apple.com> | 2012-06-26 19:47:59 +0000 | 
|---|---|---|
| committer | Manman Ren <mren@apple.com> | 2012-06-26 19:47:59 +0000 | 
| commit | a09820414ae1ac4f47e8ab7ddaac9a909634bfee (patch) | |
| tree | 9dfbcd8cdefc6a0e572a70530cab51752e22ad6e /llvm/lib | |
| parent | 727a771a5f8492665817729a84a8bc83d21f37c5 (diff) | |
| download | bcm5719-llvm-a09820414ae1ac4f47e8ab7ddaac9a909634bfee.tar.gz bcm5719-llvm-a09820414ae1ac4f47e8ab7ddaac9a909634bfee.zip  | |
X86: add GATHER intrinsics (AVX2) in LLVM
Support the following intrinsics:
llvm.x86.avx2.gather.d.pd, llvm.x86.avx2.gather.q.pd
llvm.x86.avx2.gather.d.pd.256, llvm.x86.avx2.gather.q.pd.256
llvm.x86.avx2.gather.d.ps, llvm.x86.avx2.gather.q.ps
llvm.x86.avx2.gather.d.ps.256, llvm.x86.avx2.gather.q.ps.256
Modified Disassembler to handle VSIB addressing mode.
llvm-svn: 159221
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp | 27 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 47 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 37 | 
7 files changed, 132 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 08c732c3886..e74aea26d0e 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -916,15 +916,18 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {    // If we have both a base register and an index register make sure they are    // both 64-bit or 32-bit registers. +  // To support VSIB, IndexReg can be 128-bit or 256-bit registers.    if (BaseReg != 0 && IndexReg != 0) {      if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && -        !X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) && +        (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || +         X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&          IndexReg != X86::RIZ) {        Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");        return 0;      }      if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && -        !X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) && +        (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || +         X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&          IndexReg != X86::EIZ){        Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");        return 0; diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index b13a00620bb..b13e1ca41c5 100644 --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -498,7 +498,30 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,      } else {        baseReg = MCOperand::CreateReg(0);      } -     + +    // Check whether we are handling VSIB addressing mode for GATHER. +    // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and +    // we should use SIB_INDEX_XMM4|YMM4 for VSIB. +    // I don't see a way to get the correct IndexReg in readSIB: +    //   We can tell whether it is VSIB or SIB after instruction ID is decoded, +    //   but instruction ID may not be decoded yet when calling readSIB. +    uint32_t Opcode = mcInst.getOpcode(); +    bool IsGather = (Opcode == X86::VGATHERDPDrm || +                     Opcode == X86::VGATHERQPDrm || +                     Opcode == X86::VGATHERDPSrm || +                     Opcode == X86::VGATHERQPSrm); +    bool IsGatherY = (Opcode == X86::VGATHERDPDYrm || +                      Opcode == X86::VGATHERQPDYrm || +                      Opcode == X86::VGATHERDPSYrm || +                      Opcode == X86::VGATHERQPSYrm); +    if (IsGather || IsGatherY) { +      unsigned IndexOffset = insn.sibIndex - +                         (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); +      SIBIndex IndexBase = IsGatherY ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; +      insn.sibIndex = (SIBIndex)(IndexBase +  +                           (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); +    } +      if (insn.sibIndex != SIB_INDEX_NONE) {        switch (insn.sibIndex) {        default: @@ -509,6 +532,8 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,          indexReg = MCOperand::CreateReg(X86::x); break;        EA_BASES_32BIT        EA_BASES_64BIT +      REGS_XMM +      REGS_YMM  #undef ENTRY        }      } else { diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index fae309b45d0..e2caf6a2a8b 100644 --- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -310,11 +310,14 @@ typedef enum {   * SIBIndex - All possible values of the SIB index field.   *   Borrows entries from ALL_EA_BASES with the special case that   *   sib is synonymous with NONE. + * Vector SIB: index can be XMM or YMM.   */  typedef enum {    SIB_INDEX_NONE,  #define ENTRY(x) SIB_INDEX_##x,    ALL_EA_BASES +  REGS_XMM +  REGS_YMM  #undef ENTRY    SIB_INDEX_max  } SIBIndex; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 12f1961ed80..52506fa185f 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -621,7 +621,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,        VEX_X = 0x0;      if (HasVEX_4VOp3) -      VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1); +      // Instruction format for 4VOp3: +      //   src1(ModR/M), MemAddr, src3(VEX_4V) +      // CurOp points to start of the MemoryOperand, +      //   it skips TIED_TO operands if exist, then increments past src1. +      // CurOp + X86::AddrNumOperands will point to src3. +      VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands);      break;    case X86II::MRM0m: case X86II::MRM1m:    case X86II::MRM2m: case X86II::MRM3m: diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index d381f3da4b1..ea9e5bcf180 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -187,6 +187,7 @@ namespace {    private:      SDNode *Select(SDNode *N); +    SDNode *SelectGather(SDNode *N, unsigned Opc);      SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);      SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);      SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT); @@ -1952,6 +1953,29 @@ static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {    llvm_unreachable("unrecognized size for LdVT");  } +/// SelectGather - Customized ISel for GATHER operations. +/// +SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { +  // Operands of Gather: VSrc, Base, VIdx, VMask, Scale +  SDValue Chain = Node->getOperand(0); +  SDValue VSrc = Node->getOperand(2); +  SDValue Base = Node->getOperand(3); +  SDValue VIdx = Node->getOperand(4); +  SDValue VMask = Node->getOperand(5); +  ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6)); +  assert(Scale && "Scale should be a constant for GATHER operations"); + +  // Memory Operands: Base, Scale, Index, Disp, Segment +  SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32); +  SDValue Segment = CurDAG->getRegister(0, MVT::i32); +  const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx, +                          Disp, Segment, VMask, Chain}; +  SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), +                                           VSrc.getValueType(), MVT::Other, +                                           Ops, array_lengthof(Ops)); +  return ResNode; +} +  SDNode *X86DAGToDAGISel::Select(SDNode *Node) {    EVT NVT = Node->getValueType(0);    unsigned Opc, MOpc; @@ -1967,6 +1991,29 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {    switch (Opcode) {    default: break; +  case ISD::INTRINSIC_W_CHAIN: { +    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); +    switch (IntNo) { +    default: break; +    case Intrinsic::x86_avx2_gather_d_pd: +      return SelectGather(Node, X86::VGATHERDPDrm); +    case Intrinsic::x86_avx2_gather_d_pd_256: +      return SelectGather(Node, X86::VGATHERDPDYrm); +    case Intrinsic::x86_avx2_gather_q_pd: +      return SelectGather(Node, X86::VGATHERQPDrm); +    case Intrinsic::x86_avx2_gather_q_pd_256: +      return SelectGather(Node, X86::VGATHERQPDYrm); +    case Intrinsic::x86_avx2_gather_d_ps: +      return SelectGather(Node, X86::VGATHERDPSrm); +    case Intrinsic::x86_avx2_gather_d_ps_256: +      return SelectGather(Node, X86::VGATHERDPSYrm); +    case Intrinsic::x86_avx2_gather_q_ps: +      return SelectGather(Node, X86::VGATHERQPSrm); +    case Intrinsic::x86_avx2_gather_q_ps_256: +      return SelectGather(Node, X86::VGATHERQPSYrm); +    } +    break; +  }    case X86ISD::GlobalBaseReg:      return getGlobalBaseReg(); diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 9ce61409daf..0023424e660 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -325,6 +325,14 @@ def f128mem : X86MemOperand<"printf128mem"> {    let ParserMatchClass = X86Mem128AsmOperand; }  def f256mem : X86MemOperand<"printf256mem">{     let ParserMatchClass = X86Mem256AsmOperand; } +def v128mem : Operand<iPTR> { +  let PrintMethod = "printf128mem"; +  let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm); +  let ParserMatchClass = X86Mem128AsmOperand; } +def v256mem : Operand<iPTR> { +  let PrintMethod = "printf256mem"; +  let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm); +  let ParserMatchClass = X86Mem256AsmOperand; }  }  // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 5efb501085e..8974d453522 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7994,3 +7994,40 @@ defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;  defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;  defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;  defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; + +//===----------------------------------------------------------------------===// +// VGATHER - GATHER Operations +// +//            [(set VR128:$dst, (IntGather128 VR128:$src1, addr:$src2, VR128:$idx, +//                               VR128:$mask, (i8 imm:$sc)))]>, VEX_4VOp3; +//            [(set VR256:$dst, (IntGather256 VR256:$src1, addr:$src2, VR256:$idx, +//                               VR256:$mask, (i8 imm:$sc)))]>, VEX_4VOp3; +multiclass avx2_gather<bits<8> opc, string OpcodeStr, +                       Intrinsic IntGather128, Intrinsic IntGather256> { +  def rm  : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), +            (ins VR128:$src1, v128mem:$src2, VR128:$mask), +            !strconcat(OpcodeStr, +              "\t{$src1, $src2, $mask|$mask, $src2, $src1}"), +            []>, VEX_4VOp3; +  def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), +            (ins VR256:$src1, v256mem:$src2, VR256:$mask), +            !strconcat(OpcodeStr, +              "\t{$src1, $src2, $mask|$mask, $src2, $src1}"), +            []>, VEX_4VOp3; +} + +//let Constraints = "$src1 = $dst, $mask = $mask_wb" in { +let Constraints = "$src1 = $dst" in { +  defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", +                                int_x86_avx2_gather_d_pd, +                                int_x86_avx2_gather_d_pd_256>, VEX_W; +  defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", +                                int_x86_avx2_gather_q_pd, +                                int_x86_avx2_gather_q_pd_256>, VEX_W; +  defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", +                                int_x86_avx2_gather_d_ps, +                                int_x86_avx2_gather_d_ps_256>; +  defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", +                                int_x86_avx2_gather_q_ps, +                                int_x86_avx2_gather_q_ps_256>; +}  | 

