diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 111 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 21 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/stdarg.ll | 20 | 
4 files changed, 146 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e8f4f43dc3d..15436310207 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1527,37 +1527,44 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,        // Store the integer parameter registers.        SmallVector<SDValue, 8> MemOps;        SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); -      SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, -                                  DAG.getIntPtrConstant(VarArgsGPOffset)); +      unsigned Offset = VarArgsGPOffset;        for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) { +        SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, +                                  DAG.getIntPtrConstant(Offset));          unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],                                       X86::GR64RegisterClass);          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);          SDValue Store =            DAG.getStore(Val.getValue(1), dl, Val, FIN, -                       PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0); +                       PseudoSourceValue::getFixedStack(RegSaveFrameIndex), +                       Offset);          MemOps.push_back(Store); -        FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, -                          DAG.getIntPtrConstant(8)); +        Offset += 8;        } +      if (!MemOps.empty()) +          Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, +                             &MemOps[0], MemOps.size()); +        // Now store the XMM (fp + vector) parameter registers. -      FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, -                        DAG.getIntPtrConstant(VarArgsFPOffset)); +      SmallVector<SDValue, 11> SaveXMMOps; +      SaveXMMOps.push_back(Chain); + +      unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass); +      SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8); +      SaveXMMOps.push_back(ALVal); + +      SaveXMMOps.push_back(DAG.getIntPtrConstant(RegSaveFrameIndex)); +      SaveXMMOps.push_back(DAG.getIntPtrConstant(VarArgsFPOffset)); +        for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {          unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],                                       X86::VR128RegisterClass);          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32); -        SDValue Store = -          DAG.getStore(Val.getValue(1), dl, Val, FIN, -                       PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0); -        MemOps.push_back(Store); -        FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, -                          DAG.getIntPtrConstant(16)); +        SaveXMMOps.push_back(Val);        } -      if (!MemOps.empty()) -          Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, -                             &MemOps[0], MemOps.size()); +      Chain = DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl, MVT::Other, +                          &SaveXMMOps[0], SaveXMMOps.size());      }    } @@ -7090,6 +7097,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {    case X86ISD::DEC:                return "X86ISD::DEC";    case X86ISD::MUL_IMM:            return "X86ISD::MUL_IMM";    case X86ISD::PTEST:              return "X86ISD::PTEST"; +  case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";    }  } @@ -7513,7 +7521,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,    F->insert(MBBIter, newMBB);    F->insert(MBBIter, nextMBB); -  // Move all successors to thisMBB to nextMBB +  // Move all successors of thisMBB to nextMBB    nextMBB->transferSuccessors(thisMBB);    // Update thisMBB to fall through to newMBB @@ -7585,6 +7593,73 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,    return nextMBB;  } +MachineBasicBlock * +X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( +                                                 MachineInstr *MI, +                                                 MachineBasicBlock *MBB) const { +  // Emit code to save XMM registers to the stack. The ABI says that the +  // number of registers to save is given in %al, so it's theoretically +  // possible to do an indirect jump trick to avoid saving all of them, +  // however this code takes a simpler approach and just executes all +  // of the stores if %al is non-zero. It's less code, and it's probably +  // easier on the hardware branch predictor, and stores aren't all that +  // expensive anyway. + +  // Create the new basic blocks. One block contains all the XMM stores, +  // and one block is the final destination regardless of whether any +  // stores were performed. +  const BasicBlock *LLVM_BB = MBB->getBasicBlock(); +  MachineFunction *F = MBB->getParent(); +  MachineFunction::iterator MBBIter = MBB; +  ++MBBIter; +  MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB); +  MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB); +  F->insert(MBBIter, XMMSaveMBB); +  F->insert(MBBIter, EndMBB); + +  // Set up the CFG. +  // Move any original successors of MBB to the end block. +  EndMBB->transferSuccessors(MBB); +  // The original block will now fall through to the XMM save block. +  MBB->addSuccessor(XMMSaveMBB); +  // The XMMSaveMBB will fall through to the end block. +  XMMSaveMBB->addSuccessor(EndMBB); + +  // Now add the instructions. +  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); +  DebugLoc DL = MI->getDebugLoc(); + +  unsigned CountReg = MI->getOperand(0).getReg(); +  int64_t RegSaveFrameIndex = MI->getOperand(1).getImm(); +  int64_t VarArgsFPOffset = MI->getOperand(2).getImm(); + +  if (!Subtarget->isTargetWin64()) { +    // If %al is 0, branch around the XMM save block. +    BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg); +    BuildMI(MBB, DL, TII->get(X86::JE)).addMBB(EndMBB); +    MBB->addSuccessor(EndMBB); +  } + +  // In the XMM save block, save all the XMM argument registers. +  for (int i = 3, e = MI->getNumOperands(); i != e; ++i) { +    int64_t Offset = (i - 3) * 16 + VarArgsFPOffset; +    BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr)) +      .addFrameIndex(RegSaveFrameIndex) +      .addImm(/*Scale=*/1) +      .addReg(/*IndexReg=*/0) +      .addImm(/*Disp=*/Offset) +      .addReg(/*Segment=*/0) +      .addReg(MI->getOperand(i).getReg()) +      .addMemOperand(MachineMemOperand( +                       PseudoSourceValue::getFixedStack(RegSaveFrameIndex), +                       MachineMemOperand::MOStore, Offset, +                       /*Size=*/16, /*Align=*/16)); +  } + +  F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now. + +  return EndMBB; +}  MachineBasicBlock *  X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, @@ -7888,6 +7963,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,                                                 X86::MOV32rr, X86::MOV32rr,                                                 X86::MOV32ri, X86::MOV32ri,                                                 false); +  case X86::VASTART_SAVE_XMM_REGS: +    return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);    }  } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 9e6cd819e16..3ac6e51bbb0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -243,7 +243,12 @@ namespace llvm {        MUL_IMM,        // PTEST - Vector bitwise comparisons -      PTEST +      PTEST, + +      // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, +      // according to %al. An operator is needed so that this can be expanded +      // with control flow. +      VASTART_SAVE_XMM_REGS      };    } @@ -715,6 +720,11 @@ namespace llvm {                                                            MachineBasicBlock *BB,                                                          unsigned cmovOpc) const; +    /// Utility function to emit the xmm reg save portion of va_start. +    MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter( +                                                   MachineInstr *BInstr, +                                                   MachineBasicBlock *BB) const; +      /// Emit nodes that will be selected as "test Op0,Op0", or something      /// equivalent, for use with the given x86 condition code.      SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG); diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index ecb1b208f20..f13102640a9 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -56,6 +56,10 @@ def SDT_X86CallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>,  def SDT_X86Call   : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; +def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>, +                                                         SDTCisVT<1, iPTR>, +                                                         SDTCisVT<2, iPTR>]>; +  def SDTX86RepStr  : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;  def SDTX86RdTsc   : SDTypeProfile<0, 0, []>; @@ -114,6 +118,11 @@ def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,  def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,                          [SDNPHasChain, SDNPOptInFlag]>; +def X86vastart_save_xmm_regs : +                 SDNode<"X86ISD::VASTART_SAVE_XMM_REGS", +                        SDT_X86VASTART_SAVE_XMM_REGS, +                        [SDNPHasChain]>; +  def X86callseq_start :                   SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,                          [SDNPHasChain, SDNPOutFlag]>; @@ -511,6 +520,18 @@ def ADJCALLSTACKUP32   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),                            Requires<[In32BitMode]>;  } +// x86-64 va_start lowering magic. +let usesCustomDAGSchedInserter = 1 in +def VASTART_SAVE_XMM_REGS : I<0, Pseudo, +                              (outs), +                              (ins GR8:$al, +                                   i64imm:$regsavefi, i64imm:$offset, +                                   variable_ops), +                              "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset", +                              [(X86vastart_save_xmm_regs GR8:$al, +                                                         imm:$regsavefi, +                                                         imm:$offset)]>; +  // Nop  let neverHasSideEffects = 1 in {    def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>; diff --git a/llvm/test/CodeGen/X86/stdarg.ll b/llvm/test/CodeGen/X86/stdarg.ll new file mode 100644 index 00000000000..7207057729f --- /dev/null +++ b/llvm/test/CodeGen/X86/stdarg.ll @@ -0,0 +1,20 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | grep {testb	\[%\]al, \[%\]al} + +%struct.__va_list_tag = type { i32, i32, i8*, i8* } + +define void @foo(i32 %x, ...) nounwind { +entry: +  %ap = alloca [1 x %struct.__va_list_tag], align 8; <[1 x %struct.__va_list_tag]*> [#uses=2] +  %ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*; <i8*> [#uses=2] +  call void @llvm.va_start(i8* %ap12) +  %ap3 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0; <%struct.__va_list_tag*> [#uses=1] +  call void @bar(%struct.__va_list_tag* %ap3) nounwind +  call void @llvm.va_end(i8* %ap12) +  ret void +} + +declare void @llvm.va_start(i8*) nounwind + +declare void @bar(%struct.__va_list_tag*) + +declare void @llvm.va_end(i8*) nounwind  | 

