diff options
| author | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2011-08-09 23:41:44 +0000 | 
|---|---|---|
| committer | Jakob Stoklund Olesen <stoklund@2pi.dk> | 2011-08-09 23:41:44 +0000 | 
| commit | 6a14dc01ff01a60d0d997b765b4dd20166e81340 (patch) | |
| tree | 7c98690aadce40bb1d6528ba2513588517a4018b /llvm | |
| parent | 92b942b1b533f0c26232b055e129cea21ae5e563 (diff) | |
| download | bcm5719-llvm-6a14dc01ff01a60d0d997b765b4dd20166e81340.tar.gz bcm5719-llvm-6a14dc01ff01a60d0d997b765b4dd20166e81340.zip  | |
Promote VMOVS to VMOVD when possible.
On Cortex-A8, we use the NEON v2f32 instructions for f32 arithmetic. For
better latency, we also send D-register copies down the NEON pipeline by
translating them to vorr instructions.
This patch promotes even S-register copies to D-register copies when
possible so they can also go down the NEON pipeline.  Example:
        vldr.32 s0, LCPI0_0
    loop:
        vorr    d1, d0, d0
    loop2:
        ...
        vadd.f32        d1, d1, d16
The vorr instruction looked like this after regalloc:
    %S2<def> = COPY %S0, %D1<imp-def>
Copies involving odd S-registers, and copies that don't define the full
D-register are left alone.
llvm-svn: 137182
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 31 | 
1 files changed, 29 insertions, 2 deletions
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 0987e56852d..9231e6b1e32 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -629,9 +629,36 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,    bool SPRSrc  = ARM::SPRRegClass.contains(SrcReg);    unsigned Opc; -  if (SPRDest && SPRSrc) +  if (SPRDest && SPRSrc) {      Opc = ARM::VMOVS; -  else if (GPRDest && SPRSrc) + +    // An even S-S copy may be feeding a NEON v2f32 instruction being used for +    // f32 operations.  In that case, it is better to copy the full D-regs with +    // a VMOVD since that can be converted to a NEON-domain move by +    // NEONMoveFix.cpp.  Check that MI is the original COPY instruction, and +    // that it really defines the whole D-register. +    if ((DestReg - ARM::S0) % 2 == 0 && (SrcReg - ARM::S0) % 2 == 0 && +        I != MBB.end() && I->isCopy() && +        I->getOperand(0).getReg() == DestReg && +        I->getOperand(1).getReg() == SrcReg) { +      // I is pointing to the ortiginal COPY instruction. +      // Find the parent D-registers. +      const TargetRegisterInfo *TRI = &getRegisterInfo(); +      unsigned SrcD = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, +                                               &ARM::DPRRegClass); +      unsigned DestD = TRI->getMatchingSuperReg(DestReg, ARM::ssub_0, +                                                &ARM::DPRRegClass); +      // Be careful to not clobber an INSERT_SUBREG that reads and redefines a +      // D-register.  There must be an <imp-def> of destD, and no <imp-use>. +      if (I->definesRegister(DestD, TRI) && !I->readsRegister(DestD, TRI)) { +        Opc = ARM::VMOVD; +        SrcReg = SrcD; +        DestReg = DestD; +        if (KillSrc) +          KillSrc = I->killsRegister(SrcReg, TRI); +      } +    } +  } else if (GPRDest && SPRSrc)      Opc = ARM::VMOVRS;    else if (SPRDest && GPRSrc)      Opc = ARM::VMOVSR;  | 

